|
- import imaplib
- import shutil
- import smtplib
- from email.mime.multipart import MIMEMultipart
- from email.mime.text import MIMEText
- import boto3
- from boto3 import Session
- from django.views import View
- from model.models import EmailSortModel, EmailConfigModel
- from Utils.NeteaseMail import NeteaseMail
- from django.views.generic import TemplateView
- from object.ResponseObject import ResponseObject
- import os
- import csv
- class ComprehendView(TemplateView):
- def get(self, request, *args, **kwargs):
- request.encoding = 'utf-8'
- operation = kwargs.get('operation')
- return self.validation(request.GET, request, operation)
- def post(self, request, *args, **kwargs):
- request.encoding = 'utf-8'
- operation = kwargs.get('operation')
- return self.validation(request.POST, request, operation)
- def validation(self, request_dict, request, operation):
- response = ResponseObject()
- if operation is None:
- return response.json(444, 'error path')
- else:
- if operation == 'createSort':
- return self.createSort(request_dict, response)
- elif operation == 'doStartSortWord':
- return self.doStartSortWord(request_dict, response)
- elif operation == 'write_csv':
- return self.write_csv(request_dict, response)
- elif operation == 'upload_csv':
- return self.upload_csv(request_dict, response)
- elif operation == 'create_document_classifier':
- return self.create_document_classifier(request_dict, response)
- elif operation == 'create_endpoint':
- return self.create_endpoint(request_dict, response)
- elif operation == 'describe_document_classifier':
- return self.describe_document_classifier(request_dict, response)
- elif operation == 'describe_endpoint':
- return self.describe_endpoint(request_dict, response)
- elif operation == 'delete_endpoint':
- return self.delete_endpoint(request_dict, response)
- elif operation == 'delete_document_classifier':
- return self.delete_document_classifier(request_dict, response)
- elif operation == 'getDocumentClassifier':
- return self.get_document_classifier(request_dict, response)
- elif operation == 'email':
- return self.email(request_dict, response)
- def createSort(self, request_dict, response):
- client = boto3.client('comprehend', region_name='region')
- # Create a document classifier
- create_response = client.create_document_classifier(
- InputDataConfig={
- 'S3Uri': 's3://S3Bucket/docclass/file name'
- },
- DataAccessRoleArn='arn:aws:iam::account number:role/resource name',
- DocumentClassifierName='SampleCodeClassifier1',
- LanguageCode='en'
- )
- print("Create response: %s\n", create_response)
- # Check the status of the classifier
- describe_response = client.describe_document_classifier(
- DocumentClassifierArn=create_response['DocumentClassifierArn'])
- print("Describe response: %s\n", describe_response)
- # List all classifiers in account
- list_response = client.list_document_classifiers()
- print("List response: %s\n", list_response)
- def doStartSortWord(self, request_dict, response):
- client = boto3.client('comprehend', region_name='region')
- start_response = client.start_document_classification_job(
- InputDataConfig={
- 'S3Uri': 's3://srikad-us-west-2-input/docclass/file name',
- 'InputFormat': 'ONE_DOC_PER_LINE'
- },
- OutputDataConfig={
- 'S3Uri': 's3://S3Bucket/output'
- },
- DataAccessRoleArn='arn:aws:iam::account number:role/resource name',
- DocumentClassifierArn=
- 'arn:aws:comprehend:region:account number:document-classifier/SampleCodeClassifier1'
- )
- print("Start response: %s\n", start_response)
- # Check the status of the job
- describe_response = client.describe_document_classification_job(JobId=start_response['JobId'])
- print("Describe response: %s\n", describe_response)
- # List all classification jobs in account
- list_response = client.list_document_classification_jobs()
- print("List response: %s\n", list_response)
- def write_csv(self, request_dict, response):
- file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/emailtrain.csv')
- with open(file_path, "w", newline='') as f:
- sort = EmailSortModel.objects.values('sort', 'clientquestion')
- for a in sort:
- text = [a['sort'], a['clientquestion']]
- csv_writer = csv.writer(f)
- for i in range(50):
- csv_writer.writerow(text)
- return response.json(0)
- def upload_csv(self, request_dict, response):
- file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/emailtrain.csv')
- bucket_name = "asj-amazon-comprehend"
- upload_key = "location/emailtrain.csv"
- with open(file_path, "rb") as f:
- NeteaseMail().s3_login().upload_fileobj(f, bucket_name, Key=upload_key)
- return response.json(0)
- def create_document_classifier(self, request_dict, response):
- try:
- response1 = NeteaseMail().aws_login().create_document_classifier(
- DocumentClassifierName='lhq',
- DataAccessRoleArn='arn:aws:iam::697864307463:role/service-role/AmazonComprehendServiceRoleS3FullAccess-admin',
- # Tags=[{'Key': 'string','Value': 'string'},],
- InputDataConfig={
- 'DataFormat': 'COMPREHEND_CSV',
- # | 'AUGMENTED_MANIFEST',
- 'S3Uri': 's3://asj-amazon-comprehend/location/emailtrain.csv',
- # 'LabelDelimiter': 'string',
- # 'AugmentedManifests': [
- # {
- # 'S3Uri': 'string',
- # 'AttributeNames': ['string', ]
- # },
- # ]
- },
- OutputDataConfig={
- 'S3Uri': 's3://asj-amazon-comprehend/output/lhq/'},
- # 'KmsKeyId': 'string'},
- # ClientRequestToken='string',
- LanguageCode='en',
- # | 'es' | 'fr' | 'de' | 'it' | 'pt' | 'ar'
- # | 'hi' | 'ja' | 'ko' | 'zh' | 'zh-TW',
- # VolumeKmsKeyId='string',
- # VpcConfig={
- # 'SecurityGroupIds': ['string', ],
- # 'Subnets': ['string', ]
- # },
- Mode='MULTI_CLASS' # 'MULTI_LABEL'
- )
- except Exception as e:
- return response.json(404, repr('模型正在训练或已存在'))
- else:
- print(response1)
- return response.json(0)
- def create_endpoint(self, request_dict, response):
- try:
- response1 = NeteaseMail().aws_login().create_endpoint(
- EndpointName='lhq',
- ModelArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier/lhq',
- # Document classifier arn
- DesiredInferenceUnits=1,
- # ClientRequestToken='string',
- # Tags=[{'Key': 'string','Value': 'string'},]
- )
- except Exception as e:
- return response.json(404, repr('模型正在训练或不存在,请等待模型训练完再创建端点'))
- else:
- print(response1)
- return response.json(0)
- def describe_document_classifier(self, request_dict, response):
- try:
- # classifier_name = request_dict.get('classifier_name', None)
- a = NeteaseMail().aws_arn()[0]
- # classifier_arn = a + str(classifier_name)
- classifier_arn = a + 'lhq'
- response1 = NeteaseMail().aws_login().describe_document_classifier(
- DocumentClassifierArn=classifier_arn
- )
- a1 = {"Status": response1['DocumentClassifierProperties']['Status']}
- except Exception as e:
- return response.json(404, repr('模型不存在'))
- else:
- return response.json(0, a1)
- def describe_endpoint(self, request_dict, response):
- try:
- # endpoint_name = request_dict.get('endpoint_name', None)
- a = NeteaseMail().aws_arn()[1]
- # endpoint_arn = a + str(endpoint_name)
- endpoint_arn = a + 'lhq'
- response1 = NeteaseMail().aws_login().describe_endpoint(
- EndpointArn=endpoint_arn
- )
- a1 = {"Status": response1['EndpointProperties']['Status']}
- except Exception as e:
- return response.json(404, repr('端点不存在'))
- else:
- return response.json(0, a1)
- def delete_endpoint(self, request_dict, response):
- try:
- # endpoint_name = request_dict.get('endpoint_name', None)
- a = NeteaseMail().aws_arn()[1]
- # endpoint_arn = a + str(endpoint_name)
- endpoint_arn = a + 'lhq'
- response1 = NeteaseMail().aws_login().delete_endpoint(
- EndpointArn=endpoint_arn)
- except Exception as e:
- return response.json(404, repr('端点不存在或正在删除中'))
- else:
- return response.json(0)
- def delete_document_classifier(self, request_dict, response):
- try:
- # classifier_name = request_dict.get('classifier_name', None)
- a = NeteaseMail().aws_arn()[0]
- # classifier_arn = a + str(classifier_name)
- classifier_arn = a + 'lhq'
- response1 = NeteaseMail().aws_login().delete_document_classifier(
- DocumentClassifierArn=classifier_arn)
- except Exception as e:
- return response.json(404, repr('模型不存在或正在删除中'))
- else:
- return response.json(0)
- def get_document_classifier(self, request_dict, response):
- ec_qs = EmailConfigModel.objects.filter(emailtag=1) # 获取总邮件
- emailsum = 0
- replyemailsum = 0
- for ec in ec_qs:
- email_server = NeteaseMail().loginEmail(ec.emailserver, ec.fromaddr, ec.password, ec.emailserverport)
- subject_list, from_list, bodydata_list, uid_list, results, email_body, fujian_list, image_list = NeteaseMail().getEmailContext(
- email_server)
- emailsum += len(results)
- print("标题:", len(subject_list), subject_list)
- print("发件人:", len(from_list), from_list)
- print("邮件内容:", len(bodydata_list), bodydata_list)
- print("邮件uid:", len(uid_list), uid_list)
- print("附件判断:", len(fujian_list), fujian_list)
- print('图片数量:', len(image_list), image_list)
- if not bodydata_list:
- print("邮箱内无未读邮件")
- else:
- print('成功拿到邮件数据')
- for i, v in enumerate(subject_list):
- print('-------------------------------------------------------------------------')
- print("邮件标题:", subject_list[i])
- if not bodydata_list[i] or not subject_list[i]:
- print("邮件无正文或无标题,不进行回复")
- continue
- if fujian_list[i] == 'have':
- reply_model = bodydata_list[i]
- image = image_list
- shoujianren = 'liehaoquan2021@163.com'
- fajianren = from_list[i].split()[1]
- NeteaseMail().sentemail(None, uid_list[i],reply_model, image, subject_list[i], fajianren, shoujianren,
- ec.fromaddr, ec.password)
- file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/')
- shutil.rmtree(file_path)
- os.mkdir(file_path)
- print('邮件含有图片或附件,已转发到zendesk')
- email_server.set_flags(uid_list[i], b'\\Seen', silent=False)
- continue
- response1 = NeteaseMail().aws_login().classify_document(
- Text=bodydata_list[i].split('\n\n\n\n\n')[0],
- EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/lhq'
- # EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/test'
- )
- # print(response.index(max(response['Classes']['Score'])))
- nums = []
- print('emailsort:')
- for cls in response1['Classes']:
- print(cls['Name'])
- nums.append(cls['Score'])
- print("识别种类及命中率:", response1['Classes'][nums.index(max(nums))])
- if response1['Classes'][nums.index(max(nums))]['Score'] >= 0.95:
- reply_sort = EmailSortModel.objects.filter(
- sort=response1['Classes'][nums.index(max(nums))]['Name']).values('autoreplymodel')
- if reply_sort.exists():
- reply_model = reply_sort[0]['autoreplymodel'] + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + \
- 'At' + email_body[i]['Date'] + from_list[i].split()[0] + \
- from_list[i].split()[1] + \
- 'wrote:\n ' + bodydata_list[i]
- print(reply_model)
- shoujianren = from_list[i].split()[1].replace('<', '').replace('>', '')
- a = NeteaseMail().sentemail(None, uid_list[i], reply_model, None, subject_list[i], ec.fromaddr, shoujianren,
- ec.fromaddr, ec.password)
- email_server.set_flags(uid_list[i], b'\\Seen', silent=False)
- if a == 'fail':
- print("此邮件被判定为垃圾邮件,不进行回复")
- email_server.remove_flags(uid_list[i], b'\\Seen', silent=False)
- else:
- replyemailsum += a
- else:
- print("无此转发分类,不进行回复")
- else:
- reply_model = bodydata_list[i]
- shoujianren = 'liehaoquan2021@163.com'
- fajianren = from_list[i].split()[1]
- NeteaseMail().sentemail(reply_model, None, subject_list[i], fajianren, shoujianren,
- ec.fromaddr, ec.password)
- email_server.set_flags(uid_list[i], b'\\Seen', silent=False)
- print("命中率低于0.95,已转发到zendesk")
- # es_qs = EmailSortModel.objects.filter(sort=response1['Classes'][nums.index(max(nums))]['Name'])
- # if es_qs.exists():
- # uemail_qs = EmailConfigModel.objects.filter(userid=es_qs[0]['userid'], langconfig__langcode='en')
- # if uemail_qs.exists():
- # for uem in uemail_qs:
- # NeteaseMail().sentemail(bodydata_list[i], subject_list[i], from_list[i], uem.fromaddr, ec.fromaddr, ec.password)
- NeteaseMail().closeEmail(email_server)
- emaildict = {
- "邮箱内获取到未读邮件数": emailsum,
- "已自动回复的邮件数": replyemailsum
- }
- return response.json(0, emaildict)
- def email(self, request_dict, response):
- from imapclient import IMAPClient
- import email
- import time
- import email.parser
- from nntplib import decode_header
- from email.header import Header
- import chardet
- ec_qs = EmailConfigModel.objects.filter(emailtag=1) # 获取总邮件
- for ec in ec_qs:
- global a, email_server
- #邮件登录
- try:
- email_server = IMAPClient(ec.emailserver, ssl=True, port=ec.emailserverport)
- # email_server = imaplib.IMAP4_SSL(IMAP_SERVER, 993) # 网易企业邮箱服务器及SSL端口
- print("imap4 服务器连接成功")
- email_server.login(ec.fromaddr, ec.password)
- email_server.id_({"name": "IMAPClient", "version": "2.1.0"})
- # email_server.login(FROM_ADDR, PASSWORD)
- print("imap4 (%s)账号密码正确,登录成功" % ec.fromaddr)
- except:
- print("imap4 服务器连接失败")
- email_server.select_folder('INBOX')
- results = email_server.search('UNSEEN') # 读取未读邮件
- print("邮箱内获取到未读邮件:", len(results))
- subject_list = []
- from_list = []
- uid_list = []
- email_body = []
- fujian_list = []
- image_list = []
- plain_list = []
- html_list = []
- a = 0
- # 对每一封邮件进行内容解析
- for uid in results:
- msgdict = email_server.fetch(uid, ['Body[]', 'ENVELOPE'], '(RFC822)')
- mailbody = msgdict[uid][b'BODY[]']
- envelope = msgdict[uid][b'ENVELOPE']
- message = email.message_from_bytes(mailbody)
- text = message.as_string()
- body = email.parser.Parser().parsestr(text)
- nowtime = time.asctime(time.localtime(time.time()))
- nowmonth = nowtime.split()[1]
- nowday = nowtime.split()[2]
- emailmonth = body['Date'].split()[2]
- emailday = body['Date'].split()[1]
- print('email',emailmonth,emailday)
- print('now', nowmonth, nowday)
- # if emailmonth != nowmonth or emailday != nowday:
- # email_server.remove_flags(uid, b'\\Seen', silent=False)
- # continue
- email_body.append(body)
- subject_list.append(decode_header(body['Subject']))
- from_list.append(decode_header(body['from']))
- uid_list.append(uid)
- print("标题:", subject_list)
- attlist = {}
- try:
- for part in body.walk():
- if not part.is_multipart():
- file = part.get_filename() # 附件名
- if file:
- a = 1
- filename = email.header.decode_header(file)[0][0] # 附件名
- charset = email.header.decode_header(file)[0][1] # 编码
- if part.get_all("Content-ID"):
- content_id = part.get_all("Content-ID")[0][1:-1]
- else:
- content_id = "" # 附件ID,也就是邮件源码里面的cid
- ''' 多个附件时将附件名和ID对应保存到dict里面,后面将正文中的cid替换为本地保存路径 '''
- attlist[content_id] = filename
- ''' 附件文件名为中文或有编码的时候要进行转码 '''
- if str(charset) != "None":
- filename = filename.decode(charset)
- filedata = part.get_payload(decode=True) # 附件内容
- ''' 把附件写到文件里面,附件一定要用wb打开,二进制 '''
- file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/')
- image_list.append(file_path + filename)
- with open(file_path + filename, "wb") as fw:
- fw.write(filedata)
- fw.close()
- elif part.get_content_type() == 'text/plain':
- con3 = part.get_payload(decode=True).strip()
- print('plain:', chardet.detect(con3))
- if chardet.detect(con3)['encoding'] == 'utf-8':
- plain_list.append(con3.decode('utf-8'))
- continue
- # if chardet.detect(con3)['encoding'] == 'ISO-8859-1':
- # plain_list.append(con3.decode('ISO-8859-1')) # .decode('gbk').encode('utf8')
- # continue
- else:
- plain_list.append(con3.decode('gbk'))
- continue
- elif part.get_content_type() == 'text/html':
- con3 = part.get_payload(decode=True).strip()
- print('html', chardet.detect(con3))
- if chardet.detect(con3)['encoding'] == 'utf-8':
- html_list.append(con3.decode('utf-8'))
- continue
- # if chardet.detect(con3)['encoding'] == 'ISO-8859-1':
- # html_list.append(con3.decode('ISO-8859-1'))
- # continue
- else:
- html_list.append(con3.decode('gbk'))
- continue
- except Exception as e:
- print(e)
- print('获取邮件内容失败')
- email_server.remove_flags(uid, b'\\Seen', silent=False)
- subject_list.clear()
- from_list.clear()
- uid_list.clear()
- email_body.clear()
- fujian_list.clear()
- image_list.clear()
- plain_list.clear()
- html_list.clear()
- print('-------------------------------------------------------------------------')
- continue
- if a == 1:
- fujian_list.append('have')
- a = 0
- else:
- fujian_list.append('none')
- email_server.remove_flags(uid, b'\\Seen', silent=False)
- print("发件人:", from_list)
- print("邮件内容:")
- print("plain形式:",plain_list)
- print("html形式:",html_list)
- print("邮件uid:", uid_list)
- print("邮件是否含有图片或附件:", fujian_list)
- print('图片或附件名字:', image_list)
- if not html_list and not plain_list:
- print("邮箱内无未读邮件")
- else:
- print('成功拿到邮件数据')
- # 对邮件类型进行判断处理
- for i, v in enumerate(uid_list):
- print("邮件标题:", subject_list[i])
- if html_list and not plain_list:
- print('此邮件仅含有html')
- reply_model = html_list[i]
- fajianren = from_list[i]
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, subject_list[i], fajianren, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- if plain_list[i] == '':
- print('此邮件无正文')
- reply_model = ''
- fajianren = from_list[i]
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, subject_list[i], fajianren, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- if not subject_list[i]:
- print('此邮件无主题')
- if html_list and not plain_list:
- print('此邮件无主题且仅含有html')
- reply_model = html_list[i]
- fajianren = from_list[i]
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- biaoti = '(无主题)'
- NeteaseMail().sentemail(email_server, uid_list[i],reply_model, None, biaoti, fajianren, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- elif plain_list:
- print('此邮件无主题且含有正文')
- reply_model = plain_list[i]
- fajianren = from_list[i]
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- biaoti = '(无主题)'
- NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, biaoti, fajianren, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- # 处理含有图片或附件的邮件
- if fujian_list[i] == 'have':
- print('此邮件含有图片或附件或视频或压缩包')
- reply_model = html_list[i]
- image = image_list
- fajianren = from_list[i]
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- NeteaseMail().sentemail(email_server, uid_list[i], reply_model, image, subject_list[i], fajianren, shoujianren,
- ec.fromaddr, ec.password)
- file_path = os.path.join(os.path.dirname(__file__) + '/../emailtrainfile/image/')
- shutil.rmtree(file_path)
- os.mkdir(file_path)
- print('-------------------------------------------------------------------------')
- break
- if fujian_list[i] == 'none':
- # 处理无图片或附件的邮件
- # 进行种类判断
- print("识别内容:", plain_list[i].split('\n\n')[0]) # plain
- if len(plain_list[i].split('\n\n\n')[0]) < 1000:
- response1 = NeteaseMail().aws_login().classify_document(
- Text=plain_list[i].split('\n\n\n')[0],
- EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/lhq'
- # EndpointArn='arn:aws:comprehend:us-east-1:697864307463:document-classifier-endpoint/test'
- )
- # print(response.index(max(response['Classes']['Score'])))
- nums = []
- print('emailsort:')
- for cls in response1['Classes']:
- print(cls['Name'])
- nums.append(cls['Score'])
- print("识别种类及命中率:", response1['Classes'][nums.index(max(nums))])
- # 命中率高于0.95时
- if response1['Classes'][nums.index(max(nums))]['Score'] >= 0.95:
- reply_sort = EmailSortModel.objects.filter(
- sort=response1['Classes'][nums.index(max(nums))]['Name']).values('autoreplymodel')
- if reply_sort.exists():
- reply_model = reply_sort[0]['autoreplymodel'] + '\n' + '\n' + '\n' + '\n' + '\n' + '\n' + \
- 'At' + email_body[i]['Date'] + str(from_list[i].split()[0:-1]) + \
- from_list[i].split()[-1] + \
- 'wrote:\n ' + plain_list[i]
- print(reply_model)
- shoujianren = from_list[i].split()[1].replace('<', '').replace('>', '')
- NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], ec.fromaddr, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- else:
- print("此邮件命中率低于0.95")
- reply_model = html_list[i] # html
- fajianren = from_list[i]
- print(fujian_list)
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i],
- fajianren, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- else:
- print("识别内容过长")
- reply_model = html_list[i] # html
- fajianren = from_list[i]
- print(fujian_list)
- shoujianren = 'liehaoquan2021@163.com' # 写zendesk邮箱
- NeteaseMail().sentemail(email_server, uid_list[i], reply_model, None, subject_list[i], fajianren, shoujianren,
- ec.fromaddr, ec.password)
- print('-------------------------------------------------------------------------')
- break
- subject_list.clear()
- from_list.clear()
- uid_list.clear()
- email_body.clear()
- fujian_list.clear()
- image_list.clear()
- plain_list.clear()
- html_list.clear()
- NeteaseMail().closeEmail(email_server)
- return response.json(0)
- # if __name__ == '__main__':
- # ComprehendAction().get_document_classifier()
|