157 lines
4.8 KiB
C#
157 lines
4.8 KiB
C#
using System;
|
|
using System.Collections.Generic;
|
|
using System.Linq;
|
|
using System.Text;
|
|
using System.Threading.Tasks;
|
|
|
|
namespace AliFsmnVadSharp
|
|
{
|
|
public enum FrameState
|
|
{
|
|
kFrameStateInvalid = -1,
|
|
kFrameStateSpeech = 1,
|
|
kFrameStateSil = 0
|
|
}
|
|
|
|
/// <summary>
|
|
/// final voice/unvoice state per frame
|
|
/// </summary>
|
|
public enum AudioChangeState
|
|
{
|
|
kChangeStateSpeech2Speech = 0,
|
|
kChangeStateSpeech2Sil = 1,
|
|
kChangeStateSil2Sil = 2,
|
|
kChangeStateSil2Speech = 3,
|
|
kChangeStateNoBegin = 4,
|
|
kChangeStateInvalid = 5
|
|
}
|
|
|
|
|
|
internal class WindowDetector
|
|
{
|
|
private int _window_size_ms = 0; //window_size_ms;
|
|
private int _sil_to_speech_time = 0; //sil_to_speech_time;
|
|
private int _speech_to_sil_time = 0; //speech_to_sil_time;
|
|
private int _frame_size_ms = 0; //frame_size_ms;
|
|
|
|
private int _win_size_frame = 0;
|
|
private int _win_sum = 0;
|
|
private int[] _win_state = new int[0];// * _win_size_frame; // 初始化窗
|
|
|
|
private int _cur_win_pos = 0;
|
|
private int _pre_frame_state = (int)FrameState.kFrameStateSil;
|
|
private int _cur_frame_state = (int)FrameState.kFrameStateSil;
|
|
private int _sil_to_speech_frmcnt_thres = 0; //int(sil_to_speech_time / frame_size_ms);
|
|
private int _speech_to_sil_frmcnt_thres = 0; //int(speech_to_sil_time / frame_size_ms);
|
|
|
|
private int _voice_last_frame_count = 0;
|
|
private int _noise_last_frame_count = 0;
|
|
private int _hydre_frame_count = 0;
|
|
|
|
public WindowDetector()
|
|
{
|
|
|
|
}
|
|
|
|
public WindowDetector(int window_size_ms, int sil_to_speech_time, int speech_to_sil_time, int frame_size_ms)
|
|
{
|
|
_window_size_ms = window_size_ms;
|
|
_sil_to_speech_time = sil_to_speech_time;
|
|
_speech_to_sil_time = speech_to_sil_time;
|
|
_frame_size_ms = frame_size_ms;
|
|
|
|
_win_size_frame = (int)(window_size_ms / frame_size_ms);
|
|
_win_sum = 0;
|
|
_win_state = new int[_win_size_frame];//[0] * _win_size_frame; // 初始化窗
|
|
|
|
_cur_win_pos = 0;
|
|
_pre_frame_state = (int)FrameState.kFrameStateSil;
|
|
_cur_frame_state = (int)FrameState.kFrameStateSil;
|
|
_sil_to_speech_frmcnt_thres = (int)(sil_to_speech_time / frame_size_ms);
|
|
_speech_to_sil_frmcnt_thres = (int)(speech_to_sil_time / frame_size_ms);
|
|
|
|
_voice_last_frame_count = 0;
|
|
_noise_last_frame_count = 0;
|
|
_hydre_frame_count = 0;
|
|
}
|
|
|
|
public void Reset()
|
|
{
|
|
_cur_win_pos = 0;
|
|
_win_sum = 0;
|
|
_win_state = new int[_win_size_frame];
|
|
_pre_frame_state = (int)FrameState.kFrameStateSil;
|
|
_cur_frame_state = (int)FrameState.kFrameStateSil;
|
|
_voice_last_frame_count = 0;
|
|
_noise_last_frame_count = 0;
|
|
_hydre_frame_count = 0;
|
|
}
|
|
|
|
|
|
public int GetWinSize()
|
|
{
|
|
return _win_size_frame;
|
|
}
|
|
|
|
public AudioChangeState DetectOneFrame(FrameState frameState, int frame_count)
|
|
{
|
|
|
|
|
|
_cur_frame_state = (int)FrameState.kFrameStateSil;
|
|
if (frameState == FrameState.kFrameStateSpeech)
|
|
{
|
|
_cur_frame_state = 1;
|
|
}
|
|
|
|
else if (frameState == FrameState.kFrameStateSil)
|
|
{
|
|
_cur_frame_state = 0;
|
|
}
|
|
|
|
else
|
|
{
|
|
return AudioChangeState.kChangeStateInvalid;
|
|
}
|
|
|
|
_win_sum -= _win_state[_cur_win_pos];
|
|
_win_sum += _cur_frame_state;
|
|
_win_state[_cur_win_pos] = _cur_frame_state;
|
|
_cur_win_pos = (_cur_win_pos + 1) % _win_size_frame;
|
|
|
|
if (_pre_frame_state == (int)FrameState.kFrameStateSil && _win_sum >= _sil_to_speech_frmcnt_thres)
|
|
{
|
|
_pre_frame_state = (int)FrameState.kFrameStateSpeech;
|
|
return AudioChangeState.kChangeStateSil2Speech;
|
|
}
|
|
|
|
|
|
if (_pre_frame_state == (int)FrameState.kFrameStateSpeech && _win_sum <= _speech_to_sil_frmcnt_thres)
|
|
{
|
|
_pre_frame_state = (int)FrameState.kFrameStateSil;
|
|
return AudioChangeState.kChangeStateSpeech2Sil;
|
|
}
|
|
|
|
|
|
if (_pre_frame_state == (int)FrameState.kFrameStateSil)
|
|
{
|
|
return AudioChangeState.kChangeStateSil2Sil;
|
|
}
|
|
|
|
if (_pre_frame_state == (int)FrameState.kFrameStateSpeech)
|
|
{
|
|
return AudioChangeState.kChangeStateSpeech2Speech;
|
|
}
|
|
|
|
return AudioChangeState.kChangeStateInvalid;
|
|
}
|
|
|
|
private int FrameSizeMs()
|
|
{
|
|
return _frame_size_ms;
|
|
}
|
|
|
|
|
|
|
|
}
|
|
}
|