ComprehendController.py 31 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608
  1. import imaplib
  2. import shutil
  3. import smtplib
  4. from email.mime.multipart import MIMEMultipart
  5. from email.mime.text import MIMEText
  6. import boto3
  7. from boto3 import Session
  8. from django.views import View
  9. from model.models import EmailSortModel, EmailConfigModel
  10. from Utils.NeteaseMail import NeteaseMail
  11. from django.views.generic import TemplateView
  12. from object.ResponseObject import ResponseObject
  13. import os
  14. import csv
  15. class ComprehendView(TemplateView):
  16. def get(self, request, *args, **kwargs):
  17. request.encoding = 'utf-8'
  18. operation = kwargs.get('operation')
  19. return self.validation(request.GET, request, operation)
  20. def post(self, request, *args, **kwargs):
  21. request.encoding = 'utf-8'
  22. operation = kwargs.get('operation')
  23. return self.validation(request.POST, request, operation)
  24. def validation(self, request_dict, request, operation):
  25. response = ResponseObject()
  26. if operation is None:
  27. return response.json(444, 'error path')
  28. else:
  29. if operation == 'createSort':
  30. return self.createSort(request_dict, response)
  31. elif operation == 'doStartSortWord':
  32. return self.doStartSortWord(request_dict, response)
  33. elif operation == 'write_csv':
  34. return self.write_csv(request_dict, response)
  35. elif operation == 'upload_csv':
  36. return self.upload_csv(request_dict, response)
  37. elif operation == 'create_document_classifier':
  38. return self.create_document_classifier(request_dict, response)
  39. elif operation == 'create_endpoint':
  40. return self.create_endpoint(request_dict, response)
  41. elif operation == 'describe_document_classifier':
  42. return self.describe_document_classifier(request_dict, response)
  43. elif operation == 'describe_endpoint':
  44. return self.describe_endpoint(request_dict, response)
  45. elif operation == 'delete_endpoint':
  46. return self.delete_endpoint(request_dict, response)
  47. elif operation == 'delete_document_classifier':
  48. return self.delete_document_classifier(request_dict, response)
  49. elif operation == 'getDocumentClassifier':
  50. return self.get_document_classifier(request_dict, response)
  51. elif operation == 'email':
  52. return self.email(request_dict, response)
  53. def createSort(self, request_dict, response):
  54. client = boto3.client('comprehend', region_name='region')
  55. # Create a document classifier
  56. create_response = client.create_document_classifier(
  57. InputDataConfig={
  58. 'S3Uri': 's3://S3Bucket/docclass/file name'
  59. },
  60. DataAccessRoleArn='arn:aws:iam::account number:role/resource name',
  61. DocumentClassifierName='SampleCodeClassifier1',
  62. LanguageCode='en'
  63. )
  64. print("Create response: %s\n", create_response)
  65. # Check the status of the classifier
  66. describe_response = client.describe_document_classifier(
  67. DocumentClassifierArn=create_response['DocumentClassifierArn'])
  68. print("Describe response: %s\n", describe_response)
  69. # List all classifiers in account
  70. list_response = client.list_document_classifiers()
  71. print("List response: %s\n", list_response)
  72. def doStartSortWord(self, request_dict, response):
  73. client = boto3.client('comprehend', region_name='region')
  74. start_response = client.start_document_classification_job(
  75. InputDataConfig={
  76. 'S3Uri': 's3://srikad-us-west-2-input/docclass/file name',
  77. 'InputFormat': 'ONE_DOC_PER_LINE'
  78. },
  79. OutputDataConfig={
  80. 'S3Uri': 's3://S3Bucket/output'
  81. },
  82. DataAccessRoleArn='arn:aws:iam::account number:role/resource name',
  83. DocumentClassifierArn=
  84. 'arn:aws:comprehend:region:account number:document-classifier/SampleCodeClassifier1'
  85. )
  86. print("Start response: %s\n", start_response)
  87. # Check the status of the job
  88. describe_response = client.describe_document_classification_job(JobId=start_response['JobId'])
  89. print("Describe response: %s\n", describe_response)
  90. # List all classification jobs in account
  91. list_response = client.list_document_classification_jobs()
  92. print("List response: %s\n", list_response)
  93. def write_csv(self, request_dict, response):
  94. file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/emailtrain.csv')
  95. with open(file_path, "w", newline='') as f:
  96. sort = EmailSortModel.objects.values('sort', 'clientquestion')
  97. for a in sort:
  98. text = [a['sort'], a['clientquestion']]
  99. csv_writer = csv.writer(f)
  100. for i in range(50):
  101. csv_writer.writerow(text)
  102. return response.json(0)
  103. def upload_csv(self, request_dict, response):
  104. file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/emailtrain.csv')
  105. bucket_name = "asj-amazon-comprehend"
  106. upload_key = "location/emailtrain.csv"
  107. with open(file_path, "rb") as f:
  108. NeteaseMail().s3_login().upload_fileobj(f, bucket_name, Key=upload_key)
  109. return response.json(0)
  110. def create_document_classifier(self, request_dict, response):
  111. try:
  112. response1 = NeteaseMail().aws_login().create_document_classifier(
  113. DocumentClassifierName='lhq',
  114. DataAccessRoleArn='arn:aws:iam::697864307463:role/service-role/AmazonComprehendServiceRoleS3FullAccess-admin',
  115. # Tags=[{'Key': 'string','Value': 'string'},],
  116. InputDataConfig={
  117. 'DataFormat': 'COMPREHEND_CSV',
  118. # | 'AUGMENTED_MANIFEST',
  119. 'S3Uri': 's3://asj-amazon-comprehend/location/emailtrain.csv',
  120. # 'LabelDelimiter': 'string',
  121. # 'AugmentedManifests': [
  122. # {
  123. # 'S3Uri': 'string',
  124. # 'AttributeNames': ['string', ]
  125. # },
  126. # ]
  127. },
  128. OutputDataConfig={
  129. 'S3Uri': 's3://asj-amazon-comprehend/output/lhq/'},
  130. # 'KmsKeyId': 'string'},
  131. # ClientRequestToken='string',
  132. LanguageCode='en',
  133. # | 'es' | 'fr' | 'de' | 'it' | 'pt' | 'ar'
  134. # | 'hi' | 'ja' | 'ko' | 'zh' | 'zh-TW',
  135. # VolumeKmsKeyId='string',
  136. # VpcConfig={
  137. # 'SecurityGroupIds': ['string', ],
  138. # 'Subnets': ['string', ]
  139. # },
  140. Mode='MULTI_CLASS' # 'MULTI_LABEL'
  141. )
  142. except Exception as e:
  143. return response.json(404, repr('模型正在训练或已存在'))
  144. else:
  145. print(response1)
  146. return response.json(0)
  147. def create_endpoint(self, request_dict, response):
  148. try:
  149. response1 = NeteaseMail().aws_login().create_endpoint(
  150. EndpointName='lhq',
  151. ModelArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier/lhq',
  152. # Document classifier arn
  153. DesiredInferenceUnits=1,
  154. # ClientRequestToken='string',
  155. # Tags=[{'Key': 'string','Value': 'string'},]
  156. )
  157. except Exception as e:
  158. return response.json(404, repr('模型正在训练或不存在,请等待模型训练完再创建端点'))
  159. else:
  160. print(response1)
  161. return response.json(0)
  162. def describe_document_classifier(self, request_dict, response):
  163. try:
  164. # classifier_name = request_dict.get('classifier_name', None)
  165. a = NeteaseMail().aws_arn()[0]
  166. # classifier_arn = a + str(classifier_name)
  167. classifier_arn = a + 'lhq'
  168. response1 = NeteaseMail().aws_login().describe_document_classifier(
  169. DocumentClassifierArn=classifier_arn
  170. )
  171. a1 = {"Status": response1['DocumentClassifierProperties']['Status']}
  172. except Exception as e:
  173. return response.json(404, repr('模型不存在'))
  174. else:
  175. return response.json(0, a1)
  176. def describe_endpoint(self, request_dict, response):
  177. try:
  178. # endpoint_name = request_dict.get('endpoint_name', None)
  179. a = NeteaseMail().aws_arn()[1]
  180. # endpoint_arn = a + str(endpoint_name)
  181. endpoint_arn = a + 'lhq'
  182. response1 = NeteaseMail().aws_login().describe_endpoint(
  183. EndpointArn=endpoint_arn
  184. )
  185. a1 = {"Status": response1['EndpointProperties']['Status']}
  186. except Exception as e:
  187. return response.json(404, repr('端点不存在'))
  188. else:
  189. return response.json(0, a1)
  190. def delete_endpoint(self, request_dict, response):
  191. try:
  192. # endpoint_name = request_dict.get('endpoint_name', None)
  193. a = NeteaseMail().aws_arn()[1]
  194. # endpoint_arn = a + str(endpoint_name)
  195. endpoint_arn = a + 'lhq'
  196. response1 = NeteaseMail().aws_login().delete_endpoint(
  197. EndpointArn=endpoint_arn)
  198. except Exception as e:
  199. return response.json(404, repr('端点不存在或正在删除中'))
  200. else:
  201. return response.json(0)
  202. def delete_document_classifier(self, request_dict, response):
  203. try:
  204. # classifier_name = request_dict.get('classifier_name', None)
  205. a = NeteaseMail().aws_arn()[0]
  206. # classifier_arn = a + str(classifier_name)
  207. classifier_arn = a + 'lhq'
  208. response1 = NeteaseMail().aws_login().delete_document_classifier(
  209. DocumentClassifierArn=classifier_arn)
  210. except Exception as e:
  211. return response.json(404, repr('模型不存在或正在删除中'))
  212. else:
  213. return response.json(0)
  214. def get_document_classifier(self, request_dict, response):
  215. ec_qs = EmailConfigModel.objects.filter(emailtag=1) # 获取总邮件
  216. emailsum = 0
  217. replyemailsum = 0
  218. for ec in ec_qs:
  219. email_server = NeteaseMail().loginEmail(ec.emailserver, ec.fromaddr, ec.password, ec.emailserverport)
  220. subject_list, from_list, bodydata_list, uid_list, results, email_body, fujian_list, image_list = NeteaseMail().getEmailContext(
  221. email_server)
  222. emailsum += len(results)
  223. print("标题:", len(subject_list), subject_list)
  224. print("发件人:", len(from_list), from_list)
  225. print("邮件内容:", len(bodydata_list), bodydata_list)
  226. print("邮件uid:", len(uid_list), uid_list)
  227. print("附件判断:", len(fujian_list), fujian_list)
  228. print('图片数量:', len(image_list), image_list)
  229. if not bodydata_list:
  230. print("邮箱内无未读邮件")
  231. else:
  232. print('成功拿到邮件数据')
  233. for i, v in enumerate(subject_list):
  234. print('-------------------------------------------------------------------------')
  235. print("邮件标题:", subject_list[i])
  236. if not bodydata_list[i] or not subject_list[i]:
  237. print("邮件无正文或无标题,不进行回复")
  238. continue
  239. if fujian_list[i] == 'have':
  240. reply_model = bodydata_list[i]
  241. image = image_list
  242. shoujianren = 'liehaoquan2021@163.com'
  243. fajianren = from_list[i].split()[1]
  244. NeteaseMail().sentemail(None, uid_list[i],reply_model, image, subject_list[i], fajianren, shoujianren,
  245. ec.fromaddr, ec.password)
  246. file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/')
  247. shutil.rmtree(file_path)
  248. os.mkdir(file_path)
  249. print('邮件含有图片或附件,已转发到zendesk')
  250. email_server.set_flags(uid_list[i], b'\\Seen', silent=False)
  251. continue
  252. response1 = NeteaseMail().aws_login().classify_document(
  253. Text=bodydata_list[i].split('\n\n\n\n\n')[0],
  254. EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/lhq'
  255. # EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/test'
  256. )
  257. # print(response.index(max(response['Classes']['Score'])))
  258. nums = []
  259. print('emailsort:')
  260. for cls in response1['Classes']:
  261. print(cls['Name'])
  262. nums.append(cls['Score'])
  263. print("识别种类及命中率:", response1['Classes'][nums.index(max(nums))])
  264. if response1['Classes'][nums.index(max(nums))]['Score'] >= 0.95:
  265. reply_sort = EmailSortModel.objects.filter(
  266. sort=response1['Classes'][nums.index(max(nums))]['Name']).values('autoreplymodel')
  267. if reply_sort.exists():
  268. reply_model = reply_sort[0]['autoreplymodel'] + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + \
  269. 'At' + email_body[i]['Date'] + from_list[i].split()[0] + \
  270. from_list[i].split()[1] + \
  271. 'wrote:\n ' + bodydata_list[i]
  272. print(reply_model)
  273. shoujianren = from_list[i].split()[1].replace('<', '').replace('>', '')
  274. a = NeteaseMail().sentemail(None, uid_list[i], reply_model, None, subject_list[i], ec.fromaddr, shoujianren,
  275. ec.fromaddr, ec.password)
  276. email_server.set_flags(uid_list[i], b'\\Seen', silent=False)
  277. if a == 'fail':
  278. print("此邮件被判定为垃圾邮件,不进行回复")
  279. email_server.remove_flags(uid_list[i], b'\\Seen', silent=False)
  280. else:
  281. replyemailsum += a
  282. else:
  283. print("无此转发分类,不进行回复")
  284. else:
  285. reply_model = bodydata_list[i]
  286. shoujianren = 'liehaoquan2021@163.com'
  287. fajianren = from_list[i].split()[1]
  288. NeteaseMail().sentemail(reply_model, None, subject_list[i], fajianren, shoujianren,
  289. ec.fromaddr, ec.password)
  290. email_server.set_flags(uid_list[i], b'\\Seen', silent=False)
  291. print("命中率低于0.95,已转发到zendesk")
  292. # es_qs = EmailSortModel.objects.filter(sort=response1['Classes'][nums.index(max(nums))]['Name'])
  293. # if es_qs.exists():
  294. # uemail_qs = EmailConfigModel.objects.filter(userid=es_qs[0]['userid'], langconfig__langcode='en')
  295. # if uemail_qs.exists():
  296. # for uem in uemail_qs:
  297. # NeteaseMail().sentemail(bodydata_list[i], subject_list[i], from_list[i], uem.fromaddr, ec.fromaddr, ec.password)
  298. NeteaseMail().closeEmail(email_server)
  299. emaildict = {
  300. "邮箱内获取到未读邮件数": emailsum,
  301. "已自动回复的邮件数": replyemailsum
  302. }
  303. return response.json(0, emaildict)
  304. def email(self, request_dict, response):
  305. from imapclient import IMAPClient
  306. import email
  307. import time
  308. import email.parser
  309. from nntplib import decode_header
  310. from email.header import Header
  311. import chardet
  312. ec_qs = EmailConfigModel.objects.filter(emailtag=1) # 获取总邮件
  313. for ec in ec_qs:
  314. global a, email_server
  315. #邮件登录
  316. try:
  317. email_server = IMAPClient(ec.emailserver, ssl=True, port=ec.emailserverport)
  318. # email_server = imaplib.IMAP4_SSL(IMAP_SERVER, 993) # 网易企业邮箱服务器及SSL端口
  319. print("imap4 服务器连接成功")
  320. email_server.login(ec.fromaddr, ec.password)
  321. email_server.id_({"name": "IMAPClient", "version": "2.1.0"})
  322. # email_server.login(FROM_ADDR, PASSWORD)
  323. print("imap4 (%s)账号密码正确,登录成功" % ec.fromaddr)
  324. except:
  325. print("imap4 服务器连接失败")
  326. email_server.select_folder('INBOX')
  327. results = email_server.search('UNSEEN') # 读取未读邮件
  328. print("邮箱内获取到未读邮件:", len(results))
  329. subject_list = []
  330. from_list = []
  331. uid_list = []
  332. email_body = []
  333. fujian_list = []
  334. image_list = []
  335. plain_list = []
  336. html_list = []
  337. a = 0
  338. # 对每一封邮件进行内容解析
  339. for uid in results:
  340. msgdict = email_server.fetch(uid, ['Body[]', 'ENVELOPE'], '(RFC822)')
  341. mailbody = msgdict[uid][b'BODY[]']
  342. envelope = msgdict[uid][b'ENVELOPE']
  343. message = email.message_from_bytes(mailbody)
  344. text = message.as_string()
  345. body = email.parser.Parser().parsestr(text)
  346. nowtime = time.asctime(time.localtime(time.time()))
  347. nowmonth = nowtime.split()[1]
  348. nowday = nowtime.split()[2]
  349. emailmonth = body['Date'].split()[2]
  350. emailday = body['Date'].split()[1]
  351. print('email',emailmonth,emailday)
  352. print('now', nowmonth, nowday)
  353. # if emailmonth != nowmonth or emailday != nowday:
  354. # email_server.remove_flags(uid, b'\\Seen', silent=False)
  355. # continue
  356. email_body.append(body)
  357. subject_list.append(decode_header(body['Subject']))
  358. from_list.append(decode_header(body['from']))
  359. uid_list.append(uid)
  360. print("标题:", subject_list)
  361. attlist = {}
  362. try:
  363. for part in body.walk():
  364. if not part.is_multipart():
  365. file = part.get_filename() # 附件名
  366. if file:
  367. a = 1
  368. filename = email.header.decode_header(file)[0][0] # 附件名
  369. charset = email.header.decode_header(file)[0][1] # 编码
  370. if part.get_all("Content-ID"):
  371. content_id = part.get_all("Content-ID")[0][1:-1]
  372. else:
  373. content_id = "" # 附件ID,也就是邮件源码里面的cid
  374. ''' 多个附件时将附件名和ID对应保存到dict里面,后面将正文中的cid替换为本地保存路径 '''
  375. attlist[content_id] = filename
  376. ''' 附件文件名为中文或有编码的时候要进行转码 '''
  377. if str(charset) != "None":
  378. filename = filename.decode(charset)
  379. filedata = part.get_payload(decode=True) # 附件内容
  380. ''' 把附件写到文件里面,附件一定要用wb打开,二进制 '''
  381. file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/')
  382. image_list.append(file_path + filename)
  383. with open(file_path + filename, "wb") as fw:
  384. fw.write(filedata)
  385. fw.close()
  386. elif part.get_content_type() == 'text/plain':
  387. con3 = part.get_payload(decode=True).strip()
  388. print('plain:', chardet.detect(con3))
  389. if chardet.detect(con3)['encoding'] == 'utf-8':
  390. plain_list.append(con3.decode('utf-8'))
  391. continue
  392. # if chardet.detect(con3)['encoding'] == 'ISO-8859-1':
  393. # plain_list.append(con3.decode('ISO-8859-1')) # .decode('gbk').encode('utf8')
  394. # continue
  395. else:
  396. plain_list.append(con3.decode('gbk'))
  397. continue
  398. elif part.get_content_type() == 'text/html':
  399. con3 = part.get_payload(decode=True).strip()
  400. print('html', chardet.detect(con3))
  401. if chardet.detect(con3)['encoding'] == 'utf-8':
  402. html_list.append(con3.decode('utf-8'))
  403. continue
  404. # if chardet.detect(con3)['encoding'] == 'ISO-8859-1':
  405. # html_list.append(con3.decode('ISO-8859-1'))
  406. # continue
  407. else:
  408. html_list.append(con3.decode('gbk'))
  409. continue
  410. except Exception as e:
  411. print(e)
  412. print('获取邮件内容失败')
  413. email_server.remove_flags(uid, b'\\Seen', silent=False)
  414. subject_list.clear()
  415. from_list.clear()
  416. uid_list.clear()
  417. email_body.clear()
  418. fujian_list.clear()
  419. image_list.clear()
  420. plain_list.clear()
  421. html_list.clear()
  422. print('-------------------------------------------------------------------------')
  423. continue
  424. if a == 1:
  425. fujian_list.append('have')
  426. a = 0
  427. else:
  428. fujian_list.append('none')
  429. email_server.remove_flags(uid, b'\\Seen', silent=False)
  430. print("发件人:", from_list)
  431. print("邮件内容:")
  432. print("plain形式:",plain_list)
  433. print("html形式:",html_list)
  434. print("邮件uid:", uid_list)
  435. print("邮件是否含有图片或附件:", fujian_list)
  436. print('图片或附件名字:', image_list)
  437. if not html_list and not plain_list:
  438. print("邮箱内无未读邮件")
  439. else:
  440. print('成功拿到邮件数据')
  441. # 对邮件类型进行判断处理
  442. for i, v in enumerate(uid_list):
  443. print("邮件标题:", subject_list[i])
  444. if html_list and not plain_list:
  445. print('此邮件仅含有html')
  446. reply_model = html_list[i]
  447. fajianren = from_list[i]
  448. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  449. NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, subject_list[i], fajianren, shoujianren,
  450. ec.fromaddr, ec.password)
  451. print('-------------------------------------------------------------------------')
  452. break
  453. if plain_list[i] == '':
  454. print('此邮件无正文')
  455. reply_model = ''
  456. fajianren = from_list[i]
  457. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  458. NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, subject_list[i], fajianren, shoujianren,
  459. ec.fromaddr, ec.password)
  460. print('-------------------------------------------------------------------------')
  461. break
  462. if not subject_list[i]:
  463. print('此邮件无主题')
  464. if html_list and not plain_list:
  465. print('此邮件无主题且仅含有html')
  466. reply_model = html_list[i]
  467. fajianren = from_list[i]
  468. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  469. biaoti = '(无主题)'
  470. NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, biaoti, fajianren, shoujianren,
  471. ec.fromaddr, ec.password)
  472. print('-------------------------------------------------------------------------')
  473. break
  474. elif plain_list:
  475. print('此邮件无主题且含有正文')
  476. reply_model = plain_list[i]
  477. fajianren = from_list[i]
  478. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  479. biaoti = '(无主题)'
  480. NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, biaoti, fajianren, shoujianren,
  481. ec.fromaddr, ec.password)
  482. print('-------------------------------------------------------------------------')
  483. break
  484. # 处理含有图片或附件的邮件
  485. if fujian_list[i] == 'have':
  486. print('此邮件含有图片或附件或视频或压缩包')
  487. reply_model = html_list[i]
  488. image = image_list
  489. fajianren = from_list[i]
  490. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  491. NeteaseMail().sentemail(email_server, uid_list[i], reply_model, image, subject_list[i], fajianren, shoujianren,
  492. ec.fromaddr, ec.password)
  493. file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/')
  494. shutil.rmtree(file_path)
  495. os.mkdir(file_path)
  496. print('-------------------------------------------------------------------------')
  497. break
  498. if fujian_list[i] == 'none':
  499. # 处理无图片或附件的邮件
  500. # 进行种类判断
  501. print("识别内容:", plain_list[i].split('\n\n')[0]) # plain
  502. if len(plain_list[i].split('\n\n\n')[0]) < 1000:
  503. response1 = NeteaseMail().aws_login().classify_document(
  504. Text=plain_list[i].split('\n\n\n')[0],
  505. EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/lhq'
  506. # EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/test'
  507. )
  508. # print(response.index(max(response['Classes']['Score'])))
  509. nums = []
  510. print('emailsort:')
  511. for cls in response1['Classes']:
  512. print(cls['Name'])
  513. nums.append(cls['Score'])
  514. print("识别种类及命中率:", response1['Classes'][nums.index(max(nums))])
  515. # 命中率高于0.95时
  516. if response1['Classes'][nums.index(max(nums))]['Score'] >= 0.95:
  517. reply_sort = EmailSortModel.objects.filter(
  518. sort=response1['Classes'][nums.index(max(nums))]['Name']).values('autoreplymodel')
  519. if reply_sort.exists():
  520. reply_model = reply_sort[0]['autoreplymodel'] + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + \
  521. 'At' + email_body[i]['Date'] + str(from_list[i].split()[0:-1]) + \
  522. from_list[i].split()[-1] + \
  523. 'wrote:\n ' + plain_list[i]
  524. print(reply_model)
  525. shoujianren = from_list[i].split()[1].replace('<', '').replace('>', '')
  526. NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], ec.fromaddr, shoujianren,
  527. ec.fromaddr, ec.password)
  528. print('-------------------------------------------------------------------------')
  529. break
  530. else:
  531. print("此邮件命中率低于0.95")
  532. reply_model = html_list[i] # html
  533. fajianren = from_list[i]
  534. print(fujian_list)
  535. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  536. NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i],
  537. fajianren, shoujianren,
  538. ec.fromaddr, ec.password)
  539. print('-------------------------------------------------------------------------')
  540. break
  541. else:
  542. print("识别内容过长")
  543. reply_model = html_list[i] # html
  544. fajianren = from_list[i]
  545. print(fujian_list)
  546. shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
  547. NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], fajianren, shoujianren,
  548. ec.fromaddr, ec.password)
  549. print('-------------------------------------------------------------------------')
  550. break
  551. subject_list.clear()
  552. from_list.clear()
  553. uid_list.clear()
  554. email_body.clear()
  555. fujian_list.clear()
  556. image_list.clear()
  557. plain_list.clear()
  558. html_list.clear()
  559. NeteaseMail().closeEmail(email_server)
  560. return response.json(0)
  561. # if __name__ == '__main__':
  562. # ComprehendAction().get_document_classifier()