机器翻译模型(Hugging Face官网)
模型翻译api服务代码
# 离线翻译服务代码
# -*-coding:utf-8-*-
import os
import json
import logging
from logging.handlers import RotatingFileHandler
from datetime import datetime
from flask import Flask, request,jsonify
from gevent import pywsgi
from transformers import pipeline, AutoModelWithLMHead, AutoTokenizer
import warnings, requests
warnings.filterwarnings('ignore')
try:
print('\n 翻译服务\n')
#日志级别
# debug->info->warning->error->critical
# 创建日志记录器
logger = logging.getLogger('requests')
logger.setLevel(logging.DEBUG)
# 创建 RotatingFileHandler 处理器
#模型下载:https://huggingface.co/Helsinki-NLP
log_filename = './logs/'+datetime.now().strftime('%Y%m%d') + '.log'
file_handler = RotatingFileHandler(log_filename, maxBytes=1024*1024, backupCount=10)
file_handler.setLevel(logging.DEBUG)
# 创建格式化器
formatter = logging.Formatter('%(asctime)s %(filename)s : %(levelname)s %(message)s')
file_handler.setFormatter(formatter)
# 将处理器添加到记录器
logger.addHandler(file_handler)
logger.info('启动翻译服务...')
print('正在加载【汉语-英语】翻译模型...')
logger.info('正在加载【汉语-英语】翻译模型...')
# 英文翻译成中文
# AutoModelForSeq2SeqLM.from_pretrained
model = AutoModelWithLMHead.from_pretrained("Helsinki-NLP/opus-mt-en-zh")
tokenizer = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-en-zh")
translation = pipeline("translation_en_to_zh", model=model, tokenizer=tokenizer)
#text = "Student accommodation centres, resorts"
#translated_text = translation(text, max_length=40)[0]['translation_text']
print('正在加载【英语-汉语】翻译模型...')
logger.info('正在加载【英语-汉语】翻译模型...')
# 中文翻译成英文
model_zh2de = AutoModelWithLMHead.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
tokenizer_zh2de = AutoTokenizer.from_pretrained("Helsinki-NLP/opus-mt-zh-en")
translation_en2zh = pipeline("translation_zh_to_en", model=model_zh2de, tokenizer=tokenizer_zh2de)
app = Flask(__name__)
@app.route('/translater', methods=['POST'])
def translate():
try:
#记录当前请求日志
data=request.form.to_dict()
json_data = json.dumps(data, ensure_ascii=False)
logger.info(f'请求参数:{json_data}')
mod = request.form['model']
text = request.form['text']
if mod == 'en2zh':
result = translation(text, max_length=10240)[0]['translation_text']
if mod == 'zh2en':
result = translation_en2zh(text, max_length=10240)[0]['translation_text']
#不为空
if result:
result_data = {'success': True, 'content': result}
else:
result_data = {'success': False, 'msg': 'fail'}
result_json= jsonify(result_data)
except ZeroDivisionError as e:
result_data = {'success': False, 'msg': e}
result_json= jsonify(result_data)
return result_json
print('翻译服务已启动,请通过api形式访问该服务地址:http://127.0.0.1:16999/translater')
logger.info('翻译服务已启动,请通过api形式访问该服务地址:http://127.0.0.1:16999/translater')
server = pywsgi.WSGIServer(('0.0.0.0', 16999), app)
server.serve_forever()
except Exception as e:
print(f'翻译服务异常:{e}')
logger.error(f'翻译服务异常:{e}')
安装依赖requirements.txt文件
torch
flask
gevent
transformers
sentencepiece
Dockerfile文件
#使用一个基础的Python镜像作为基础
FROM python:3.8
WORKDIR /app
EXPOSE 16999
WORKDIR /app
#将发布文件复制到容器目录
COPY . /app
# 安装依赖
RUN pip install -r requirements.txt
ENTRYPOINT ["python", "Translate.py"]
构建docker镜像
#构建,--no-cache 参数表示每次构建构建不使用缓存
docker build --no-cache -t translate.api:latest -f Dockerfile .
#运行容器
docker run -d -p 16999:16999 -v /translate/logs:/app/logs translate.api:latest