1.首先前往 https://ai.baidu.com 百度语音官网注册账号并且申请成为开发者。
2.创建应用,并且填写相应的应用描述
3.获取 appId,apiKey,secretKey
1.新建一个百度语音的基类,命名 SpeechBase(代码如下)
using UnityEngine; namespace BaiduSpeech { /// <summary>百度语音基类</summary> public abstract class SpeechBase : MonoBehaviour { public string appId { get; set; } public string apiKey { get; set; } public string secretKey { get; set; } private void Awake() { OnAwake(); OnInitPlatform(); } private void Start() { OnStart(); } private void Update() { OnUpdate(); } private void OnDestroy() { OnDispose(); } //----------------------------------------公共函数---------------------------------------- /// <summary>初始化</summary> public virtual void OnAwake() { } public virtual void OnStart() { } public virtual void OnUpdate() { } /// <summary>初始化平台</summary> public virtual void OnInitPlatform() { } /// <summary>释放算法</summary> public virtual void OnDispose() { } } }2.新建一个语音转文字基类命名 AsrBase (代码如下)
namespace BaiduSpeech { /// <summary>语音转文字基类</summary> public abstract class AsrBase : SpeechBase { /// <summary>初始化语音</summary> public virtual void AsrInit() { } /// <summary>开始录音</summary> public virtual void VoiceStart(string json) { } /// <summary>取消本次识别,取消后将立即停止不会返回识别结果</summary> public virtual void VoiceCancel() { } /// <summary>停止录音</summary> public virtual void VoiceStop() { } } }3.新建一个Json解析的类 命名 WebAsrParams (代码如下)
[Serializable] public class WebAsrParams { public int err_no; public string err_msg; public long corpus_no; public string sn; public string[] result; }4.新建一个类 命名 AsrForWeb (代码如下)
using System; using System.Collections; using System.Text.RegularExpressions; using UnityEngine; using UnityEngine.Networking; namespace BaiduSpeech { /// <summary>Web接口语音转文本功能API管理</summary> public class AsrForWeb : AsrBase { /// <summary>记录accesstoken令牌</summary> private string accessToken = string.Empty; /// <summary>百度请求令牌API地址</summary> private const string ACCESS_TOKEN_API_URL = "https://openapi.baidu.com/oauth/2.0/token?grant_type=client_credentials&client"; /// <summary>标记是否有麦克风</summary> private bool isHaveMic = false; /// <summary>当前录音设备名称</summary> private string currentDeviceName = string.Empty; /// <summary>录音频率,控制录音质量(8000,16000)</summary> private int recordFrequency = 8000; /// <summary>上次按下时间戳</summary> private double lastPressTimestamp = 0; /// <summary>表示录音的最大时长</summary> private int recordMaxLength = 10; /// <summary>实际录音长度</summary> private int trueLength = 0; /// <summary>是否循环</summary> private bool isLoop = false; private AudioClip saveAudioClip; //初始化平台 public override void OnInitPlatform() { //获取麦克风设备,判断是否有麦克风设备 if (Microphone.devices.Length > 0) { isHaveMic = true; currentDeviceName = Microphone.devices[0]; } } /// <summary>初始化语音</summary> public override void AsrInit() { if (isHaveMic == false || Microphone.IsRecording(currentDeviceName)) { Debug.LogWarning(GetType() + "/SpeechInit()/当前设备没有麦克风!"); } else { //初始化语音成功 } } /// <summary> /// 开始录音 /// </summary> /// <param name="json">详情请移步 https://ai.baidu.com/ai-doc/SPEECH/9k38lxfnk </param> public override void VoiceStart(string json) { if (isHaveMic == false || Microphone.IsRecording(currentDeviceName)) { return; } lastPressTimestamp = GetTimestampOfNowWithMillisecond(); saveAudioClip = Microphone.Start(currentDeviceName, isLoop, recordMaxLength, recordFrequency); } /// <summary>取消本次识别,取消后将立即停止不会返回识别结果</summary> public override void VoiceCancel() { if (isHaveMic == false || !Microphone.IsRecording(currentDeviceName)) { return; } Microphone.End(currentDeviceName); } /// <summary>停止录音</summary> public override void VoiceStop() { if (isHaveMic == false || !Microphone.IsRecording(currentDeviceName)) { return; } Microphone.End(currentDeviceName); trueLength = Mathf.CeilToInt((float)(GetTimestampOfNowWithMillisecond() - lastPressTimestamp) / 1000f); if (trueLength > 1) { StartCoroutine(StartAsr()); } else { Debug.LogWarning(GetType() + "/VoiceStop()/录音时长过短!"); } } /// <summary>获取毫秒级别的时间戳,用于计算按下录音时长</summary> private double GetTimestampOfNowWithMillisecond() { return (DateTime.Now.ToUniversalTime().Ticks - 621355968000000000) / 10000; } /// <summary>获取accessToken请求令牌</summary> private IEnumerator GetAccessToken() { var uri = string.Format(ACCESS_TOKEN_API_URL + "_id={0}&client_secret={1}", apiKey, secretKey); UnityWebRequest unityWebRequest = UnityWebRequest.Get(uri); yield return unityWebRequest.SendWebRequest(); if (unityWebRequest.isDone) { Match match = Regex.Match(unityWebRequest.downloadHandler.text, @"access_token.:.(.*?).,"); if (match.Success) { accessToken = match.Groups[1].ToString(); } else { Debug.LogWarning(GetType() + "/GetAccessToken()/验证错误,获取AccessToken失败!"); } } } /// <summary>发起语音识别请求</summary> private IEnumerator StartAsr() { if (string.IsNullOrEmpty(accessToken)) { yield return GetAccessToken(); } string asrResult = string.Empty; //处理当前录音数据为PCM16 float[] samples = new float[recordFrequency * trueLength * saveAudioClip.channels]; saveAudioClip.GetData(samples, 0); var samplesShort = new short[samples.Length]; for (var index = 0; index < samples.Length; index++) { samplesShort[index] = (short)(samples[index] * short.MaxValue); } byte[] datas = new byte[samplesShort.Length * 2]; Buffer.BlockCopy(samplesShort, 0, datas, 0, datas.Length); string url = string.Format("{0}?cuid={1}&token={2}", "https://vop.baidu.com/server_api", SystemInfo.deviceUniqueIdentifier, accessToken); WWWForm wwwForm = new WWWForm(); wwwForm.AddBinaryData("audio", datas); UnityWebRequest unityWebRequest = UnityWebRequest.Post(url, wwwForm); unityWebRequest.SetRequestHeader("Content-Type", "audio/pcm;rate=" + recordFrequency); yield return unityWebRequest.SendWebRequest(); if (string.IsNullOrEmpty(unityWebRequest.error)) { asrResult = unityWebRequest.downloadHandler.text; Debug.Log(asrResult); WebAsrParams webAsrParams = JsonUtility.FromJson<WebAsrParams>(asrResult); } else { Debug.LogWarning(GetType() + "/StartAsr()/语音识别失败!"); } } } }
Unity源码:https://github.com/yongliangchen/BaiduSpeechForUnity.git
Android源码:https://github.com/yongliangchen/BaiduSpeechForAndroid.git
QQ交流群:947618353
