模型
- 平台:https://huggingface.co/
- ars-语言转文本: pipeline("automatic-speech-recognition", model="openai/whisper-large-v3", device=0 )
- tts-文本转语音:pipeline("text-to-speech", "microsoft/speecht5_tts", device=0)
- 文本语言识别:pipeline("text-classification", model="papluca/xlm-roberta-base-language-detection", device=0)
- 文本翻译--zh-en:
- pipeline("translation", model="Helsinki-NLP/opus-mt-en-zh", device=0, torch_dtype=torch_dtype)
- pipeline("translation", model="Helsinki-NLP/opus-mt-zh-en", device=0, torch_dtype=torch_dtype)
- 流程
- 网页端提交:当前批次,文本,音频base64数据,通过给python-flask后端产生一个处理任务(mongo)
- 后端循环处理要处理的任务
- 网页端查询已处理好的任务--批次
代码
## 接口
from flask import Flask, request
from flask_cors import CORS
import time
import json
from datetime import datetime
import mongo_util
import audio_message_util as amutil
import audio_util as autil
app = Flask(__name__)
CORS(app)
client = mongo_util.get_client()
db = mongo_util.get_db(client, "")
class DateTimeEncoder(json.JSONEncoder):
def default(self, obj):
if isinstance(obj, datetime):
return obj.isoformat()
return json.JSONEncoder.default(self, obj)
@app.route('/audio/totxt', methods=['POST'])
def totxt():
base64data = request.json['data']
batch = request.json['batch']
filename = 'webm/' + batch + '/' + str(time.time()).replace('.', '_')+'.webm'
out_file = filename.replace('webm', 'mp3')
autil.base64_tomp3(base64data, filename, out_file)
print(datetime.now(), batch, filename, out_file)
c = {
'batch': batch,
'original_path': filename,
'audio_path': out_file,
'type': 1,
'status': 1,
}
amutil.save(db, c)
return 'ok'
@app.route('/txt/toaudio', methods=['POST'])
def toaudio():
text = request.json['data']
batch = request.json['batch']
print(datetime.now(), batch, text)
c = {
'batch': batch,
'original_text': text,
'type': 2,
'status': 1,
}
amutil.save(db, c)
return 'ok'
@app.route('/audio/gettxt', methods=['POST'])
def gettxt():
batch = request.json['batch']
cs = amutil.get(db, {'status':2, 'batch':batch})
csj = []
for c in cs:
# print(c)
if c['type'] == 2 and c['audiofile'] != None:
c['audiourl'] = 'data:audio/webm;codecs=opus;base64,' + autil.get_audio_base64(c['audiofile'])
if c['type'] == 1 and 'audio_path' in c and c['audio_path'] != None:
c['audiourl'] = 'data:audio/webm;codecs=opus;base64,' + autil.get_audio_base64(c['audio_path'])
csj.append(c)
return json.dumps(csj, cls=DateTimeEncoder)
if __name__ == '__main__':
app.run(debug=True, port="8080")
结果