1
0
Fork 0
TakwayPlatform/utils/audio_utils.py

17 lines
637 B
Python
Raw Normal View History

2024-05-01 17:18:30 +08:00
import webrtcvad
import base64
class VAD():
def __init__(self, vad_sensitivity=1, frame_duration=30, vad_buffer_size=7, min_act_time=1, RATE=16000,**kwargs):
self.RATE = RATE
self.vad = webrtcvad.Vad(vad_sensitivity)
self.vad_buffer_size = vad_buffer_size
self.vad_chunk_size = int(self.RATE * frame_duration / 1000)
self.min_act_time = min_act_time # 最小活动时间,单位秒
def is_speech(self,data):
try:
byte_data = base64.b64decode(data)
return self.vad.is_speech(byte_data, self.RATE)
except Exception as e:
return False