import imaplib import shutil import smtplib from email.mime.multipart import MIMEMultipart from email.mime.text import MIMEText import boto3 from boto3 import Session from django.views import View from model.models import EmailSortModel, EmailConfigModel from Utils.NeteaseMail import NeteaseMail from django.views.generic import TemplateView from object.ResponseObject import ResponseObject import os import csv class ComprehendView(TemplateView): def get(self, request, *args, **kwargs): request.encoding = 'utf-8' operation = kwargs.get('operation') return self.validation(request.GET, request, operation) def post(self, request, *args, **kwargs): request.encoding = 'utf-8' operation = kwargs.get('operation') return self.validation(request.POST, request, operation) def validation(self, request_dict, request, operation): response = ResponseObject() if operation is None: return response.json(444, 'error path') else: if operation == 'createSort': return self.createSort(request_dict, response) elif operation == 'doStartSortWord': return self.doStartSortWord(request_dict, response) elif operation == 'write_csv': return self.write_csv(request_dict, response) elif operation == 'upload_csv': return self.upload_csv(request_dict, response) elif operation == 'create_document_classifier': return self.create_document_classifier(request_dict, response) elif operation == 'create_endpoint': return self.create_endpoint(request_dict, response) elif operation == 'describe_document_classifier': return self.describe_document_classifier(request_dict, response) elif operation == 'describe_endpoint': return self.describe_endpoint(request_dict, response) elif operation == 'delete_endpoint': return self.delete_endpoint(request_dict, response) elif operation == 'delete_document_classifier': return self.delete_document_classifier(request_dict, response) elif operation == 'getDocumentClassifier': return self.get_document_classifier(request_dict, response) elif operation == 'email': return self.email(request_dict, response) def createSort(self, request_dict, response): client = boto3.client('comprehend', region_name='region') # Create a document classifier create_response = client.create_document_classifier( InputDataConfig={ 'S3Uri': 's3://S3Bucket/docclass/file name' }, DataAccessRoleArn='arn:aws:iam::account number:role/resource name', DocumentClassifierName='SampleCodeClassifier1', LanguageCode='en' ) print("Create response: %s\n", create_response) # Check the status of the classifier describe_response = client.describe_document_classifier( DocumentClassifierArn=create_response['DocumentClassifierArn']) print("Describe response: %s\n", describe_response) # List all classifiers in account list_response = client.list_document_classifiers() print("List response: %s\n", list_response) def doStartSortWord(self, request_dict, response): client = boto3.client('comprehend', region_name='region') start_response = client.start_document_classification_job( InputDataConfig={ 'S3Uri': 's3://srikad-us-west-2-input/docclass/file name', 'InputFormat': 'ONE_DOC_PER_LINE' }, OutputDataConfig={ 'S3Uri': 's3://S3Bucket/output' }, DataAccessRoleArn='arn:aws:iam::account number:role/resource name', DocumentClassifierArn= 'arn:aws:comprehend:region:account number:document-classifier/SampleCodeClassifier1' ) print("Start response: %s\n", start_response) # Check the status of the job describe_response = client.describe_document_classification_job(JobId=start_response['JobId']) print("Describe response: %s\n", describe_response) # List all classification jobs in account list_response = client.list_document_classification_jobs() print("List response: %s\n", list_response) def write_csv(self, request_dict, response): file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/emailtrain.csv') with open(file_path, "w", newline='') as f: sort = EmailSortModel.objects.values('sort', 'clientquestion') for a in sort: text = [a['sort'], a['clientquestion']] csv_writer = csv.writer(f) for i in range(50): csv_writer.writerow(text) return response.json(0) def upload_csv(self, request_dict, response): file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/emailtrain.csv') bucket_name = "asj-amazon-comprehend" upload_key = "location/emailtrain.csv" with open(file_path, "rb") as f: NeteaseMail().s3_login().upload_fileobj(f, bucket_name, Key=upload_key) return response.json(0) def create_document_classifier(self, request_dict, response): try: response1 = NeteaseMail().aws_login().create_document_classifier( DocumentClassifierName='lhq', DataAccessRoleArn='arn:aws:iam::697864307463:role/service-role/AmazonComprehendServiceRoleS3FullAccess-admin', # Tags=[{'Key': 'string','Value': 'string'},], InputDataConfig={ 'DataFormat': 'COMPREHEND_CSV', # | 'AUGMENTED_MANIFEST', 'S3Uri': 's3://asj-amazon-comprehend/location/emailtrain.csv', # 'LabelDelimiter': 'string', # 'AugmentedManifests': [ # { # 'S3Uri': 'string', # 'AttributeNames': ['string', ] # }, # ] }, OutputDataConfig={ 'S3Uri': 's3://asj-amazon-comprehend/output/lhq/'}, # 'KmsKeyId': 'string'}, # ClientRequestToken='string', LanguageCode='en', # | 'es' | 'fr' | 'de' | 'it' | 'pt' | 'ar' # | 'hi' | 'ja' | 'ko' | 'zh' | 'zh-TW', # VolumeKmsKeyId='string', # VpcConfig={ # 'SecurityGroupIds': ['string', ], # 'Subnets': ['string', ] # }, Mode='MULTI_CLASS' # 'MULTI_LABEL' ) except Exception as e: return response.json(404, repr('模型正在训练或已存在')) else: print(response1) return response.json(0) def create_endpoint(self, request_dict, response): try: response1 = NeteaseMail().aws_login().create_endpoint( EndpointName='lhq', ModelArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier/lhq', # Document classifier arn DesiredInferenceUnits=1, # ClientRequestToken='string', # Tags=[{'Key': 'string','Value': 'string'},] ) except Exception as e: return response.json(404, repr('模型正在训练或不存在,请等待模型训练完再创建端点')) else: print(response1) return response.json(0) def describe_document_classifier(self, request_dict, response): try: # classifier_name = request_dict.get('classifier_name', None) a = NeteaseMail().aws_arn()[0] # classifier_arn = a + str(classifier_name) classifier_arn = a + 'lhq' response1 = NeteaseMail().aws_login().describe_document_classifier( DocumentClassifierArn=classifier_arn ) a1 = {"Status": response1['DocumentClassifierProperties']['Status']} except Exception as e: return response.json(404, repr('模型不存在')) else: return response.json(0, a1) def describe_endpoint(self, request_dict, response): try: # endpoint_name = request_dict.get('endpoint_name', None) a = NeteaseMail().aws_arn()[1] # endpoint_arn = a + str(endpoint_name) endpoint_arn = a + 'lhq' response1 = NeteaseMail().aws_login().describe_endpoint( EndpointArn=endpoint_arn ) a1 = {"Status": response1['EndpointProperties']['Status']} except Exception as e: return response.json(404, repr('端点不存在')) else: return response.json(0, a1) def delete_endpoint(self, request_dict, response): try: # endpoint_name = request_dict.get('endpoint_name', None) a = NeteaseMail().aws_arn()[1] # endpoint_arn = a + str(endpoint_name) endpoint_arn = a + 'lhq' response1 = NeteaseMail().aws_login().delete_endpoint( EndpointArn=endpoint_arn) except Exception as e: return response.json(404, repr('端点不存在或正在删除中')) else: return response.json(0) def delete_document_classifier(self, request_dict, response): try: # classifier_name = request_dict.get('classifier_name', None) a = NeteaseMail().aws_arn()[0] # classifier_arn = a + str(classifier_name) classifier_arn = a + 'lhq' response1 = NeteaseMail().aws_login().delete_document_classifier( DocumentClassifierArn=classifier_arn) except Exception as e: return response.json(404, repr('模型不存在或正在删除中')) else: return response.json(0) def get_document_classifier(self, request_dict, response): ec_qs = EmailConfigModel.objects.filter(emailtag=1) # 获取总邮件 emailsum = 0 replyemailsum = 0 for ec in ec_qs: email_server = NeteaseMail().loginEmail(ec.emailserver, ec.fromaddr, ec.password, ec.emailserverport) subject_list, from_list, bodydata_list, uid_list, results, email_body, fujian_list, image_list = NeteaseMail().getEmailContext( email_server) emailsum += len(results) print("标题:", len(subject_list), subject_list) print("发件人:", len(from_list), from_list) print("邮件内容:", len(bodydata_list), bodydata_list) print("邮件uid:", len(uid_list), uid_list) print("附件判断:", len(fujian_list), fujian_list) print('图片数量:', len(image_list), image_list) if not bodydata_list: print("邮箱内无未读邮件") else: print('成功拿到邮件数据') for i, v in enumerate(subject_list): print('-------------------------------------------------------------------------') print("邮件标题:", subject_list[i]) if not bodydata_list[i] or not subject_list[i]: print("邮件无正文或无标题,不进行回复") continue if fujian_list[i] == 'have': reply_model = bodydata_list[i] image = image_list shoujianren = 'liehaoquan2021@163.com' fajianren = from_list[i].split()[1] NeteaseMail().sentemail(None, uid_list[i],reply_model, image, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/') shutil.rmtree(file_path) os.mkdir(file_path) print('邮件含有图片或附件,已转发到zendesk') email_server.set_flags(uid_list[i], b'\\Seen', silent=False) continue response1 = NeteaseMail().aws_login().classify_document( Text=bodydata_list[i].split('\n\n\n\n\n')[0], EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/lhq' # EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/test' ) # print(response.index(max(response['Classes']['Score']))) nums = [] print('emailsort:') for cls in response1['Classes']: print(cls['Name']) nums.append(cls['Score']) print("识别种类及命中率:", response1['Classes'][nums.index(max(nums))]) if response1['Classes'][nums.index(max(nums))]['Score'] >= 0.95: reply_sort = EmailSortModel.objects.filter( sort=response1['Classes'][nums.index(max(nums))]['Name']).values('autoreplymodel') if reply_sort.exists(): reply_model = reply_sort[0]['autoreplymodel'] + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + \ 'At' + email_body[i]['Date'] + from_list[i].split()[0] + \ from_list[i].split()[1] + \ 'wrote:\n ' + bodydata_list[i] print(reply_model) shoujianren = from_list[i].split()[1].replace('<', '').replace('>', '') a = NeteaseMail().sentemail(None, uid_list[i], reply_model, None, subject_list[i], ec.fromaddr, shoujianren, ec.fromaddr, ec.password) email_server.set_flags(uid_list[i], b'\\Seen', silent=False) if a == 'fail': print("此邮件被判定为垃圾邮件,不进行回复") email_server.remove_flags(uid_list[i], b'\\Seen', silent=False) else: replyemailsum += a else: print("无此转发分类,不进行回复") else: reply_model = bodydata_list[i] shoujianren = 'liehaoquan2021@163.com' fajianren = from_list[i].split()[1] NeteaseMail().sentemail(reply_model, None, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) email_server.set_flags(uid_list[i], b'\\Seen', silent=False) print("命中率低于0.95,已转发到zendesk") # es_qs = EmailSortModel.objects.filter(sort=response1['Classes'][nums.index(max(nums))]['Name']) # if es_qs.exists(): # uemail_qs = EmailConfigModel.objects.filter(userid=es_qs[0]['userid'], langconfig__langcode='en') # if uemail_qs.exists(): # for uem in uemail_qs: # NeteaseMail().sentemail(bodydata_list[i], subject_list[i], from_list[i], uem.fromaddr, ec.fromaddr, ec.password) NeteaseMail().closeEmail(email_server) emaildict = { "邮箱内获取到未读邮件数": emailsum, "已自动回复的邮件数": replyemailsum } return response.json(0, emaildict) def email(self, request_dict, response): from imapclient import IMAPClient import email import time import email.parser from nntplib import decode_header from email.header import Header import chardet ec_qs = EmailConfigModel.objects.filter(emailtag=1) # 获取总邮件 for ec in ec_qs: global a, email_server #邮件登录 try: email_server = IMAPClient(ec.emailserver, ssl=True, port=ec.emailserverport) # email_server = imaplib.IMAP4_SSL(IMAP_SERVER, 993) # 网易企业邮箱服务器及SSL端口 print("imap4 服务器连接成功") email_server.login(ec.fromaddr, ec.password) email_server.id_({"name": "IMAPClient", "version": "2.1.0"}) # email_server.login(FROM_ADDR, PASSWORD) print("imap4 (%s)账号密码正确,登录成功" % ec.fromaddr) except: print("imap4 服务器连接失败") email_server.select_folder('INBOX') results = email_server.search('UNSEEN') # 读取未读邮件 print("邮箱内获取到未读邮件:", len(results)) subject_list = [] from_list = [] uid_list = [] email_body = [] fujian_list = [] image_list = [] plain_list = [] html_list = [] a = 0 # 对每一封邮件进行内容解析 for uid in results: msgdict = email_server.fetch(uid, ['Body[]', 'ENVELOPE'], '(RFC822)') mailbody = msgdict[uid][b'BODY[]'] envelope = msgdict[uid][b'ENVELOPE'] message = email.message_from_bytes(mailbody) text = message.as_string() body = email.parser.Parser().parsestr(text) nowtime = time.asctime(time.localtime(time.time())) nowmonth = nowtime.split()[1] nowday = nowtime.split()[2] emailmonth = body['Date'].split()[2] emailday = body['Date'].split()[1] print('email',emailmonth,emailday) print('now', nowmonth, nowday) # if emailmonth != nowmonth or emailday != nowday: # email_server.remove_flags(uid, b'\\Seen', silent=False) # continue email_body.append(body) subject_list.append(decode_header(body['Subject'])) from_list.append(decode_header(body['from'])) uid_list.append(uid) print("标题:", subject_list) attlist = {} try: for part in body.walk(): if not part.is_multipart(): file = part.get_filename() # 附件名 if file: a = 1 filename = email.header.decode_header(file)[0][0] # 附件名 charset = email.header.decode_header(file)[0][1] # 编码 if part.get_all("Content-ID"): content_id = part.get_all("Content-ID")[0][1:-1] else: content_id = "" # 附件ID,也就是邮件源码里面的cid ''' 多个附件时将附件名和ID对应保存到dict里面,后面将正文中的cid替换为本地保存路径 ''' attlist[content_id] = filename ''' 附件文件名为中文或有编码的时候要进行转码 ''' if str(charset) != "None": filename = filename.decode(charset) filedata = part.get_payload(decode=True) # 附件内容 ''' 把附件写到文件里面,附件一定要用wb打开,二进制 ''' file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/') image_list.append(file_path + filename) with open(file_path + filename, "wb") as fw: fw.write(filedata) fw.close() elif part.get_content_type() == 'text/plain': con3 = part.get_payload(decode=True).strip() print('plain:', chardet.detect(con3)) if chardet.detect(con3)['encoding'] == 'utf-8': plain_list.append(con3.decode('utf-8')) continue # if chardet.detect(con3)['encoding'] == 'ISO-8859-1': # plain_list.append(con3.decode('ISO-8859-1')) # .decode('gbk').encode('utf8') # continue else: plain_list.append(con3.decode('gbk')) continue elif part.get_content_type() == 'text/html': con3 = part.get_payload(decode=True).strip() print('html', chardet.detect(con3)) if chardet.detect(con3)['encoding'] == 'utf-8': html_list.append(con3.decode('utf-8')) continue # if chardet.detect(con3)['encoding'] == 'ISO-8859-1': # html_list.append(con3.decode('ISO-8859-1')) # continue else: html_list.append(con3.decode('gbk')) continue except Exception as e: print(e) print('获取邮件内容失败') email_server.remove_flags(uid, b'\\Seen', silent=False) subject_list.clear() from_list.clear() uid_list.clear() email_body.clear() fujian_list.clear() image_list.clear() plain_list.clear() html_list.clear() print('-------------------------------------------------------------------------') continue if a == 1: fujian_list.append('have') a = 0 else: fujian_list.append('none') email_server.remove_flags(uid, b'\\Seen', silent=False) print("发件人:", from_list) print("邮件内容:") print("plain形式:",plain_list) print("html形式:",html_list) print("邮件uid:", uid_list) print("邮件是否含有图片或附件:", fujian_list) print('图片或附件名字:', image_list) if not html_list and not plain_list: print("邮箱内无未读邮件") else: print('成功拿到邮件数据') # 对邮件类型进行判断处理 for i, v in enumerate(uid_list): print("邮件标题:", subject_list[i]) if html_list and not plain_list: print('此邮件仅含有html') reply_model = html_list[i] fajianren = from_list[i] shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break if plain_list[i] == '': print('此邮件无正文') reply_model = '' fajianren = from_list[i] shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break if not subject_list[i]: print('此邮件无主题') if html_list and not plain_list: print('此邮件无主题且仅含有html') reply_model = html_list[i] fajianren = from_list[i] shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 biaoti = '(无主题)' NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, biaoti, fajianren, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break elif plain_list: print('此邮件无主题且含有正文') reply_model = plain_list[i] fajianren = from_list[i] shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 biaoti = '(无主题)' NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, biaoti, fajianren, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break # 处理含有图片或附件的邮件 if fujian_list[i] == 'have': print('此邮件含有图片或附件或视频或压缩包') reply_model = html_list[i] image = image_list fajianren = from_list[i] shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 NeteaseMail().sentemail(email_server, uid_list[i], reply_model, image, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/') shutil.rmtree(file_path) os.mkdir(file_path) print('-------------------------------------------------------------------------') break if fujian_list[i] == 'none': # 处理无图片或附件的邮件 # 进行种类判断 print("识别内容:", plain_list[i].split('\n\n')[0]) # plain if len(plain_list[i].split('\n\n\n')[0]) < 1000: response1 = NeteaseMail().aws_login().classify_document( Text=plain_list[i].split('\n\n\n')[0], EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/lhq' # EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/test' ) # print(response.index(max(response['Classes']['Score']))) nums = [] print('emailsort:') for cls in response1['Classes']: print(cls['Name']) nums.append(cls['Score']) print("识别种类及命中率:", response1['Classes'][nums.index(max(nums))]) # 命中率高于0.95时 if response1['Classes'][nums.index(max(nums))]['Score'] >= 0.95: reply_sort = EmailSortModel.objects.filter( sort=response1['Classes'][nums.index(max(nums))]['Name']).values('autoreplymodel') if reply_sort.exists(): reply_model = reply_sort[0]['autoreplymodel'] + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + \ 'At' + email_body[i]['Date'] + str(from_list[i].split()[0:-1]) + \ from_list[i].split()[-1] + \ 'wrote:\n ' + plain_list[i] print(reply_model) shoujianren = from_list[i].split()[1].replace('<', '').replace('>', '') NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], ec.fromaddr, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break else: print("此邮件命中率低于0.95") reply_model = html_list[i] # html fajianren = from_list[i] print(fujian_list) shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break else: print("识别内容过长") reply_model = html_list[i] # html fajianren = from_list[i] print(fujian_list) shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱 NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], fajianren, shoujianren, ec.fromaddr, ec.password) print('-------------------------------------------------------------------------') break subject_list.clear() from_list.clear() uid_list.clear() email_body.clear() fujian_list.clear() image_list.clear() plain_list.clear() html_list.clear() NeteaseMail().closeEmail(email_server) return response.json(0) # if __name__ == '__main__': # ComprehendAction().get_document_classifier()