TakwayPlatform/utils/audio_utils.py

14 lines
560 B
Python

import webrtcvad
import base64
class VAD():
def __init__(self, vad_sensitivity=1, frame_duration=30, vad_buffer_size=7, min_act_time=1, RATE=16000,**kwargs):
self.RATE = RATE
self.vad = webrtcvad.Vad(vad_sensitivity)
self.vad_buffer_size = vad_buffer_size
self.vad_chunk_size = int(self.RATE * frame_duration / 1000)
self.min_act_time = min_act_time # 最小活动时间,单位秒
def is_speech(self,data):
byte_data = base64.b64decode(data)
return self.vad.is_speech(byte_data, self.RATE)