调用讯飞星火语音唤醒-新版windowsSDK
先贴一张在unity中 wins系统下成功调用新版的讯飞windowsSDK的运行截图
为什么要用讯飞的语音唤醒?
项目中需要在unity和win系统下进行语音唤醒开启语音对话,而语音唤醒比较成熟的方案大多都是在linux系统下的,如snowboy,还有就是安卓系统的语音唤醒(各大厂都有)。win系统的就很少,我了解到的也只有讯飞有在做win系统的语音唤醒,如果从linux和安卓将语音唤醒功能移植到win下,不知道要花费多少时间。因此选择了讯飞
为什么要用这个新版的windowsSDK?
- 新版便宜。在官方费用上可以看到,旧版的低消为8000(最近618活动打折到了
5600
);而新版的低消为2500,打折后为1750
。 - 唤醒词设置方便。旧版需要在讯飞的官网上设置唤醒词,然后下载整个sdk,然后将sdk中的
wakeupresource.jet
文件替换,才能完成唤醒词的修改,并且如果你的语音唤醒过期了她还会提示你无法下载…;新版的sdk经过测试,将唤醒词以txt文本的方式写在了本地,也就意味着你直接在本地修改txt文件就可以完成唤醒词的修改,极大的减轻了开发人员的额外工作 - 与时俱进,新版肯定较旧版而言更有优势才会出新版
然而旧版的语音唤醒也还是有它的优势的
- 网上已经有很多调用旧版sdk的教程,跟着写就可以实现windowsSDK语音唤醒(是的我就是其中之一),无需将时间耗费在亲自写代码去调用dll中提供的api等各种繁琐而且有难度的事情上。官方的windowsSDK是dll的形式,如果不懂c++以及C#调用dll的代码的话,自己写会十分痛苦和消耗时间;新版的由于推出较晚,使用的人不多,网上相关的教程更是基本没有,问客服也是一问三不知,我手撸了差不多一个星期才实现了基本的调用。
- 旧版的语音唤醒和语音识别是连在一起的,唤醒搞定之后识别和合成等的的调用流程都是一样的,不需要太多额外的工作
- 新版的sdk中在c++环境下调用是否快速,但在dll调用中则困难重重
大家可以权衡利弊,选择合适自己项目的语音唤醒(讯飞原版及新版)
在讯飞官网中下载新版的源码后,可以在visual studio中试运行,看看体验如何,官方的demo中是提供了读取本地音频进行唤醒识别和实时通过麦克风接收到的数据进行唤醒识别两个选项
时间关系,先直接贴上调用成功的代码
public class MSCDLL : MonoBehaviour
{
private const string _ability = "e867a88f2"; //这是默认的,请不要修改
private const string _appID = "在官网上申请的应用的appid";
private const string _apiSecret = "秘钥";
private const string _apiKey = "key";
private const string TAG = "MSCDLL";
int _dll_result = 0;
IntPtr _akikt_handle = IntPtr.Zero;
// Start is called before the first frame update
void Start()
{
byte[] bytes = ReadWavFile($"{Application.streamingAssetsPath}/output.wav");
wakeup(bytes);//测试直接读取本地的音频,将其直接传给唤醒的api进行唤醒识别
}
public void wakeup(byte[] bytes)
{//卸载&释放语音唤醒sdk资源
_dll_result = AIKIT_UnInit();
Debug.Log($"{TAG}>>>AIKIT_UnInit>>>{(_dll_result == 0 ? "success" : "fail")}");
//初始化并设置语音唤醒参数
AIKIT_InitParam param = new AIKIT_InitParam();
param.AppID = _appID;
param.ApiSecret = _apiSecret;
param.ApiKey = _apiKey;
param.AuthType = 0;
_dll_result = AIKIT_Init(param);
Debug.Log($"{TAG}>>>AIKIT_InitParam>>>{(_dll_result == 0 ? "success" : "fail")}");
//注册回调
AIKIT_OnOutput onOutput = new AIKIT_OnOutput(OnOutput);
AIKIT_OnEventDelegate onEvent = new AIKIT_OnEventDelegate(OnEvent);
AIKIT_OnErrorDelegate onError = new AIKIT_OnErrorDelegate(OnError);
AIKIT_Callbacks callbacks = new AIKIT_Callbacks
{
outputCB = onOutput,
eventCB = onEvent,
errorCB = onError
};
_dll_result = AIKIT_RegisterAbilityCallback(_ability, callbacks);
Debug.Log($"{TAG}>>>AIKIT_RegisterAbilityCallback>>>{(_dll_result == 0 ? "success" : "fail")}");
//初始化语音唤醒引擎
AIKIT_BaseParam engintParam = null;
int resutl2 = AIKIT_EngineInit(_ability, engintParam);
Debug.Log($"{TAG}>>>AIKIT_EngineInit>>>{(_dll_result == 0 ? "success" : "fail")}");
//加载唤醒所需要的资源参数>>>唤醒词文件路径
string value = (Application.streamingAssetsPath + "\\xbxb.txt").Replace("/", "\\").Replace("\\", "\\\\");
IntPtr tem = Marshal.StringToHGlobalAnsi(value);
AIKIT_CustomData loadData = new AIKIT_CustomData("key_word", tem, 2);
_dll_result = AIKIT_LoadData(_ability, loadData);
Debug.Log($"{TAG}>>>AIKIT_LoadData>>>{(_dll_result == 0 ? "success" : "fail")}");
//指定要使用的唤醒词文件数据集合,每次会话开启前需要调用。
_dll_result = AIKIT_SpecifyDataSet(_ability, "key_word", new int[] { 0 }, 1);
Debug.Log($"{TAG}>>>AIKIT_SpecifyDataSet>>>{(_dll_result == 0 ? "success" : "fail")}");
//设置唤醒词文件的参数
IntPtr aikit_handle_ptr = AIKITBuilder_Create(BuilderType.BUILDER_TYPE_PARAM);
AIKITBuilderHandle aikit_param_handle = (AIKITBuilderHandle)Marshal.PtrToStructure(aikit_handle_ptr, typeof(AIKITBuilderHandle));
_dll_result = AIKITBuilder_AddBool(aikit_param_handle, "gramLoad", true);//这一个字段不添加会报错无法运行
//result = AIKITBuilder_AddString(aikit_handle, "wdec_param_nCmThreshold", "0 0:999", "0 0:999".Length);//这个可以不添加
IntPtr param_ptr = AIKITBuilder_BuildParam(aikit_param_handle);
AIKIT_BaseParam aikit_param = (AIKIT_BaseParam)Marshal.PtrToStructure(param_ptr, typeof(AIKIT_BaseParam));
Debug.Log($"{TAG}>>>AIKITBuilder_BuildParam>>>{(_dll_result == 0 ? "success" : "fail")}");
//开启唤醒>>>同时返回句柄,用于给>>>AIKIT_WRITE>>>当做参数调用,这个句柄包含了会话信息,如果不一致无法执行后面的write和end这两个函数
_dll_result = AIKIT_Start(_ability, aikit_param, IntPtr.Zero, ref _akikt_handle);
Debug.Log($"{TAG}>>>AIKIT_Start>>>{(_dll_result == 0 ? "success" : "fail")}>>>{_dll_result}");
IntPtr aikit_data_handle_ptr = AIKITBuilder_Create(BuilderType.BUILDER_TYPE_DATA);
AIKITBuilderHandle aikit_data_handle = (AIKITBuilderHandle)Marshal.PtrToStructure(aikit_data_handle_ptr, typeof(AIKITBuilderHandle));
//将音频数据引到指定对象中,并传给dll
BuilderData builderData = new BuilderData();
builderData.type = (int)BuilderDataType.DATA_TYPE_AUDIO;
builderData.name = "wav";
builderData.status = 0;
builderData.data = bytes;
builderData.len = bytes.Length;
_dll_result = AIKITBuilder_AddBuf(aikit_data_handle, builderData);
Debug.Log($"{TAG}>>>AIKITBuilder_AddBuf>>>{(_dll_result == 0 ? "success" : "fail")}>>>{_dll_result}");
//将数据对象传给dll后,调用dll的api获取根据数据对象生成的inputdata对象
IntPtr data_ptr = AIKITBuilder_BuildData(aikit_data_handle);
//将获取到的dataptr指针强转成我们需要的对象
AIKIT_BaseData input_data = (AIKIT_BaseData)Marshal.PtrToStructure(data_ptr, typeof(AIKIT_BaseData));
//将获取到的inputdata对象传入>>>AIKIT_Write>>>开始检测唤醒
AIKIT_HANDLE aikit_handle = (AIKIT_HANDLE)Marshal.PtrToStructure(_akikt_handle, typeof(AIKIT_HANDLE));
_dll_result = AIKIT_Write(aikit_handle, input_data);
Debug.Log($"{TAG}>>>AIKIT_Write>>>{(_dll_result == 0 ? "success" : "fail")}>>>{_dll_result}");
_dll_result = AIKIT_End(aikit_handle);
Debug.Log($"{TAG}>>>AIKIT_End>>>{(_dll_result == 0 ? "success" : "fail")}>>>{_dll_result}");
}
// Update is called once per frame
void Update()
{
}
/// <summary>
/// 卸载释放sdk
/// </summary>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_UnInit();
/// <summary>
/// 初始化sdk
/// </summary>
/// <param name="data"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_Init(AIKIT_InitParam data);
/// <summary>
/// 初始化引擎
/// </summary>
/// <param name="appid"></param>
/// <param name="param"></param>
/// <returns></returns>
[DllImport("AEE_lib")]
public static extern Int32 AIKIT_EngineInit(string appid, AIKIT_BaseParam param);
/// <summary>
/// 卸载释放引擎
/// </summary>
/// <param name="appid"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_EngineUnInit(string appid);
/// <summary>
/// 注册回调
/// </summary>
/// <param name="appid"></param>
/// <param name="param"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_RegisterAbilityCallback(string appid, AIKIT_Callbacks param);
/// <summary>
/// 设置唤醒的参数
/// </summary>
/// <param name="appid"></param>
/// <param name="param"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_LoadData(string appid, AIKIT_CustomData param);
/// <summary>
/// 设置唤醒词文件信息
/// </summary>
/// <param name="appid"></param>
/// <param name="key"></param>
/// <param name="index"></param>
/// <param name="count"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_SpecifyDataSet(string appid, string key, int[] index, int count);
/// <summary>
/// 添加AIKITBuilderHandle对象
/// </summary>
/// <param name="builderType"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr AIKITBuilder_Create(BuilderType builderType);
/// <summary>
/// 通过AIKITBuilderHandle对象设置bool类型的参数
/// </summary>
/// <param name="handle"></param>
/// <param name="key"></param>
/// <param name="value"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKITBuilder_AddBool(AIKITBuilderHandle handle, string key, bool value);
/// <summary>
/// 通过AIKITBuilderHandle对象设置string类型的参数
/// </summary>
/// <param name="handle"></param>
/// <param name="key"></param>
/// <param name="value"></param>
/// <param name="len"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKITBuilder_AddString(AIKITBuilderHandle handle, string key, string value, int len);
/// <summary>
/// 通过AIKITBuilderHandle对象输入BuilderData对象,对象中记录字节数据
/// </summary>
/// <param name="handle"></param>
/// <param name="builderData"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKITBuilder_AddBuf(AIKITBuilderHandle handle, BuilderData builderData);
/// <summary>
/// 通过AIKITBuilderHandle对象将刚才设置好的参数转成AIKIT_BaseParam对象
/// </summary>
/// <param name="handle"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr AIKITBuilder_BuildParam(AIKITBuilderHandle handle);
/// <summary>
/// 通过AIKITBuilderHandle对象将刚才设置好的参数转成>>>AIKITBuilder_BuildData>>>对象
/// </summary>
/// <param name="handle"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern IntPtr AIKITBuilder_BuildData(AIKITBuilderHandle handle);
/// <summary>
/// 正式开启唤醒
/// </summary>
/// <param name="appid"></param>
/// <param name="param"></param>
/// <param name="userContext"></param>
/// <param name="outHandle"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_Start(string appid, AIKIT_BaseParam param, IntPtr userContext, ref IntPtr outHandle);
/// <summary>
/// 传入需要唤醒检测的数据文件
/// </summary>
/// <param name="appid"></param>
/// <param name="param"></param>
/// <param name="userContext"></param>
/// <param name="outHandle"></param>
/// <returns></returns>
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_Write(AIKIT_HANDLE handle, AIKIT_BaseData input);
[DllImport("AEE_lib", CallingConvention = CallingConvention.StdCall)]
public static extern Int32 AIKIT_End(AIKIT_HANDLE handle);
public static byte[] ReadWavFile(string filePath)
{
// 确保文件存在
if (!File.Exists(filePath))
{
Debug.Log(filePath);
throw new FileNotFoundException("文件未找到", filePath);
}
// 读取文件的所有字节
byte[] fileBytes = File.ReadAllBytes(filePath);
return fileBytes;
}
private void OnOutput(IntPtr handle, AIKIT_BaseDataList output)
{ // 处理输出回调
//将Intprt转成aikitoutputdata,然后获取输出结果
AIKIT_HANDLE output_handle = (AIKIT_HANDLE)Marshal.PtrToStructure(handle, typeof(AIKIT_HANDLE));
Debug.Log(output_handle.abilityID);
//AIKIT_BaseDataList input_data = (AIKIT_BaseDataList)Marshal.PtrToStructure(output, typeof(AIKIT_BaseDataList));
Debug.Log("唤醒成功!");
}
private void OnEvent(IntPtr handle, int eventType, IntPtr eventValue)
{ // 处理事件回调// 处理输出回调
Debug.Log(eventValue);
}
private void OnError(IntPtr handle, int err, IntPtr desc)
{ // 处理错误回调
Debug.Log(desc);
}
}
[StructLayout(LayoutKind.Sequential)]
public class AIKIT_InitParam
{
public int AuthType; // 授权方式,0=设备级授权,1=应用级授权
public string AppID; // 应用id
public string ApiKey; // 应用key
public string ApiSecret; // 应用secret
public string WorkDir; // sdk工作目录,需可读可写权限
public string ResDir; // 只读资源存放目录,需可读权限
public string LicenseFile; // 离线激活方式的授权文件存放路径,为空时需联网进行首次在线激活
public string BatchID; // 授权批次
public string UDID; // 用户自定义设备标识
public string CfgFile; // 配置文件路径,包括文件名
}
[StructLayout(LayoutKind.Sequential)]
public class AIKIT_BaseParam
{
public IntPtr next; // 链表指针,使用IntPtr代替void*指针
public IntPtr key; // 指针,使用IntPtr代替const char*
public IntPtr value; // 指针,使用IntPtr代替void*
public IntPtr reserved; // 预留字段,使用IntPtr代替void*
public int len; // 数据长度
public int type; // 变量类型
}
[StructLayout(LayoutKind.Sequential)]
public class AIKIT_CustomData
{
public IntPtr Next;
public byte[] KeyBytes;
public IntPtr Value;
public IntPtr Reserved;
public int Index;
public int Len;
public int From;
public AIKIT_CustomData(string key, IntPtr value, int from, int index = 0, int len = 0)
{
KeyBytes = Encoding.UTF8.GetBytes(key + "\0");
Value = value;
From = from;
Index = index;
Len = len;
Reserved = IntPtr.Zero;
Next = IntPtr.Zero;
}
}
[StructLayout(LayoutKind.Sequential)]
public class AIKIT_HANDLE
{
public IntPtr usrContext;
public string abilityID;
public IntPtr handleID;
}
[StructLayout(LayoutKind.Sequential)]
public class AIKITBuilderHandle
{
public IntPtr builderInst;
public BuilderType type;
}
public enum BuilderType
{
BUILDER_TYPE_PARAM,
BUILDER_TYPE_DATA
}
public enum BuilderDataType
{
DATA_TYPE_TEXT, // 文本
DATA_TYPE_AUDIO, // 音频
DATA_TYPE_IMAGE, // 图片
DATA_TYPE_VIDEO // 视频
}
[StructLayout(LayoutKind.Sequential)]
public class AIKIT_BaseData
{
public IntPtr next; // 链表指针,使用IntPtr代替void*指针
public AIKIT_BaseParam desc; // 指针,使用IntPtr代替const char*
public string key;
public IntPtr value; // 指针,使用IntPtr代替void*
public IntPtr reserved; // 预留字段,使用IntPtr代替void*
public int len; // 数据长度
public int type; // 变量类型
public int status; // 变量类型
public int from; // 变量类型
}
[StructLayout(LayoutKind.Sequential)]
public class BuilderData
{
public int type; // 数据类型
public string name; // 数据段名
public byte[] data; // 数据段实体(当送入路径时,此处传入路径地址字符串指针即可;
public int len; // 数据段长度(当送入路径或文件句柄时,此处传0即可)
public int status; // 数据段状态,参考AIKIT_DataStatus枚举
}
// 定义委托类型
[UnmanagedFunctionPointer(CallingConvention.Cdecl)]
public delegate void AIKIT_OnOutput(IntPtr handle, AIKIT_BaseDataList output);
public delegate void AIKIT_OnEventDelegate(IntPtr handle, int eventType, IntPtr eventValue);
public delegate void AIKIT_OnErrorDelegate(IntPtr handle, int err, IntPtr desc);
// 定义与C++结构体对应的C#结构体
// 定义与C++结构体对应的C#结构体
[StructLayout(LayoutKind.Sequential)]
public struct AIKIT_Callbacks
{
public AIKIT_OnOutput outputCB;
public AIKIT_OnEventDelegate eventCB;
public AIKIT_OnErrorDelegate errorCB;
}
[StructLayout(LayoutKind.Sequential)]
public class AIKIT_BaseDataList
{
public IntPtr node;
//public AIKIT_BaseData node;
public int count;
public int totalLen;
}
- 条件
- 三个库文件
- 一个唤醒词文件
先将官方的sdk中的三个库文件导入到unity中(项目assets文件夹中创建一个bin文件夹/放在plugins中),两个dll,一个lib
然后在项目中的某个路径下添加唤醒词文件(txt)
记得唤醒词文件的路径,在代码中需要获取那个文件
然后就可以直接运行测试了
我本地跑了很多次都是成功的,有问题可以留言
待解决的问题
- 唤醒成功后的回调对象的转换还有问题
- 哪些部分是初始化后不需要每次都调用的要和每次都需要调用的函数进行隔离
- 代码整理,我将所有的逻辑都放在了一个类中,很不雅观
这次的代码之旅让我对指针和c++有了深刻的理解,在这几天的不断尝试中,我几度想放弃,网上关于sdk调用的知识很少,客服也是一问三不知,答非所问。不过还是坚持了下来…我可真是太棒了。
下班了就先写到这里了,有时间再把具体的实现流程补充一下吧
不过如果完整的看一遍下来,应该没什么问题,主要就是时间花在了读官方提供的demo源码以及自己根据源码中提供的api去模拟demo中的一些功能。
大概的情况就是,官方的demo源码中使用了很多工厂模式去新建对象,而在提供的dll中是没有这方面的函数的,刚官方的文档也是一点没写怎么不用工厂模式去新建对象。因此需要自己根据其他的函数,去模拟创建出工厂模式创建对象的过程。
当然看网上也有很多关于自己打dll包,将工厂模式之类没开放出来的函数开放出来,然后去调用那个自己打的dll包的说法,不过感觉目前我的能力还没有到那个水平就没有去尝试,有时间的话看看能不能摸索一下吧~