k2-fsa/sherpa-ncnn:在没有互联网连接的情况下使用带有 ncnn 的下一代 Kaldi 进行实时语音识别。支持iOS、Android、Raspberry Pi、VisionFive2、LicheePi4A等。 (github.com)
如果是PC端可以直接使用ssssssilver大佬的 https://github.com/ssssssilver/sherpa-ncnn-unity.git
我这边要折腾的是WebGL版本的,所以修改了一番
1、WebSocket,客户端使用了psygames/UnityWebSocket: :whale: The Best Unity WebSocket Plugin for All Platforms. (github.com)
using System;
using System.Collections.Generic;
using System.Runtime.InteropServices;
using System.Text;
using UnityEngine;
using UnityEngine.UI;
using UnityWebSocket;
public class uSherpaWebGL : MonoBehaviour
{
IWebSocket ws;
public Text text;
Queue<string> msgs = new Queue<string>();
// Start is called before the first frame update
void Start()
{
ws = new WebSocket("ws://127.0.0.1:9999");
ws.OnOpen += OnOpen;
ws.OnMessage += OnMessage;
ws.OnError += OnError;
ws.OnClose += OnClose;
ws.ConnectAsync();
}
// Update is called once per frame
void Update()
{
if (msgs.Count > 0)
{
string msg = msgs.Dequeue();
text.text += msg;
}
}
byte[] desArray;
public void OnData(float[] input)
{
Debug.Log("input.Length:" + input.Length);
SendData(input);
}
void SendData(float[] input)
{
var desArraySize = Buffer.ByteLength(input);
IntPtr srcArrayPtr = Marshal.UnsafeAddrOfPinnedArrayElement(input, 0);
desArray = new byte[desArraySize];
Marshal.Copy(srcArrayPtr, desArray, 0, desArraySize);
if (ws != null && ws.ReadyState == WebSocketState.Open)
{
ws.SendAsync(desArray);
}
}
void OnOpen(object sender, OpenEventArgs e)
{
Debug.Log("WS connected!");
}
void OnMessage(object sender, MessageEventArgs e)
{
if (e.IsBinary)
{
string str = Encoding.UTF8.GetString(e.RawData);
Debug.Log("WS received message: " + str);
msgs.Enqueue(str);
}
else if (e.IsText)
{
}
}
void OnError(object sender, ErrorEventArgs e)
{
Debug.Log("WS error: " + e.Message);
}
void OnClose(object sender, CloseEventArgs e)
{
Debug.Log(string.Format("Closed: StatusCode: {0}, Reason: {1}", e.StatusCode, e.Reason));
}
private void OnApplicationQuit()
{
if (ws != null && ws.ReadyState != WebSocketState.Closed)
{
ws.CloseAsync();
}
}
}
服务器端使用了Fleck
// See https://aka.ms/new-console-template for more information
using Fleck;
using System.Text;
namespace uSherpaServer
{
internal class Program
{
// 声明配置和识别器变量
static SherpaNcnn.OnlineRecognizer recognizer;
static SherpaNcnn.OnlineStream onlineStream;
static string tokensPath = "tokens.txt";
static string encoderParamPath = "encoder_jit_trace-pnnx.ncnn.param";
static string encoderBinPath = "encoder_jit_trace-pnnx.ncnn.bin";
static string decoderParamPath = "decoder_jit_trace-pnnx.ncnn.param";
static string decoderBinPath = "decoder_jit_trace-pnnx.ncnn.bin";
static string joinerParamPath = "joiner_jit_trace-pnnx.ncnn.param";
static string joinerBinPath = "joiner_jit_trace-pnnx.ncnn.bin";
static int numThreads = 1;
static string decodingMethod = "greedy_search";
static string modelPath;
static float sampleRate = 16000;
static IWebSocketConnection client;
static void Main(string[] args)
{
//需要将此文件夹拷贝到exe所在的目录
modelPath = Environment.CurrentDirectory + "/sherpa-ncnn-streaming-zipformer-small-bilingual-zh-en-2023-02-16";
// 初始化配置
SherpaNcnn.OnlineRecognizerConfig config = new SherpaNcnn.OnlineRecognizerConfig
{
FeatConfig = { SampleRate = sampleRate, FeatureDim = 80 },
ModelConfig = {
Tokens = Path.Combine(modelPath,tokensPath),
EncoderParam = Path.Combine(modelPath,encoderParamPath),
EncoderBin =Path.Combine(modelPath, encoderBinPath),
DecoderParam =Path.Combine(modelPath, decoderParamPath),
DecoderBin = Path.Combine(modelPath, decoderBinPath),
JoinerParam = Path.Combine(modelPath,joinerParamPath),
JoinerBin =Path.Combine(modelPath,joinerBinPath),
UseVulkanCompute = 0,
NumThreads = numThreads
},
DecoderConfig = {
DecodingMethod = decodingMethod,
NumActivePaths = 4
},
EnableEndpoint = 1,
Rule1MinTrailingSilence = 2.4F,
Rule2MinTrailingSilence = 1.2F,
Rule3MinUtteranceLength = 20.0F
};
// 创建识别器和在线流
recognizer = new SherpaNcnn.OnlineRecognizer(config);
onlineStream = recognizer.CreateStream();
StartWebServer();
Update();
Console.ReadLine();
}
static void StartWebServer()
{
//存储连接对象的池
var connectSocketPool = new List<IWebSocketConnection>();
//创建WebSocket服务端实例并监听本机的9999端口
var server = new WebSocketServer("ws://127.0.0.1:9999");
//开启监听
server.Start(socket =>
{
//注册客户端连接建立事件
socket.OnOpen = () =>
{
client = socket;
Console.WriteLine("Open");
//将当前客户端连接对象放入连接池中
connectSocketPool.Add(socket);
};
//注册客户端连接关闭事件
socket.OnClose = () =>
{
client = null;
Console.WriteLine("Close");
//将当前客户端连接对象从连接池中移除
connectSocketPool.Remove(socket);
};
//注册客户端发送信息事件
socket.OnBinary = message =>
{
float[] floatArray = new float[message.Length / 4];
Buffer.BlockCopy(message, 0, floatArray, 0, message.Length);
// 将采集到的音频数据传递给识别器
onlineStream.AcceptWaveform(sampleRate, floatArray);
};
});
}
static string lastText = "";
static void Update()
{
while (true)
{
// 每帧更新识别器状态
if (recognizer.IsReady(onlineStream))
{
recognizer.Decode(onlineStream);
}
var text = recognizer.GetResult(onlineStream).Text;
bool isEndpoint = recognizer.IsEndpoint(onlineStream);
if (!string.IsNullOrWhiteSpace(text) && lastText != text)
{
if (string.IsNullOrWhiteSpace(lastText))
{
lastText = text;
if (client != null)
{
client.Send(Encoding.UTF8.GetBytes(text));
//Console.WriteLine("text1:" + text);
}
}
else
{
if (client != null)
{
client.Send(Encoding.UTF8.GetBytes(text.Replace(lastText, "")));
lastText = text;
}
}
}
if (isEndpoint)
{
if (!string.IsNullOrWhiteSpace(text))
{
if (client != null)
{
client.Send(Encoding.UTF8.GetBytes("。"));
}
// Console.WriteLine("text2:" + text);
}
recognizer.Reset(onlineStream);
//Console.WriteLine("Reset");
}
Thread.Sleep(200); // ms
}
}
}
}
2、Unity录音插件使用了uMicrophoneWebGL 绑定DataEvent事件实时获取话筒数据(float数组)
最后放上工程地址
客户端 uSherpa: fork from https://github.com/ssssssilver/sherpa-ncnn-unity.git改成 Unity WebGL版
服务器端 GitHub - xue-fei/uSherpaServer: uSherpaServer 给Unity提供流式语音识别的websocket服务