forked from killua/TakwayPlatform
39 lines
1.2 KiB
Python
39 lines
1.2 KiB
Python
import os
|
|
import sys
|
|
sys.path.append(os.path.dirname(os.path.dirname(__file__)))
|
|
|
|
from audio_utils import BaseRecorder
|
|
from utils.stt.modified_funasr import ModifiedRecognizer
|
|
|
|
|
|
|
|
def asr_file_stream(file_path=r'.\assets\example_recording.wav'):
|
|
# 读入音频文件
|
|
rec = BaseRecorder()
|
|
data = rec.load_audio_file(file_path)
|
|
|
|
# 创建模型
|
|
asr = ModifiedRecognizer(use_punct=True, use_emotion=True, use_speaker_ver=True)
|
|
asr.session_signup("test")
|
|
|
|
# 记录目标说话人
|
|
asr.initialize_speaker(r".\assets\example_recording.wav")
|
|
|
|
# 语音识别
|
|
print("===============================================")
|
|
text_dict = asr.streaming_recognize("test", data, auto_det_end=True)
|
|
print(f"text_dict: {text_dict}")
|
|
|
|
if not isinstance(text_dict, str):
|
|
print("".join(text_dict['text']))
|
|
|
|
# 情感识别
|
|
print("===============================================")
|
|
emotion_dict = asr.recognize_emotion(data)
|
|
print(f"emotion_dict: {emotion_dict}")
|
|
if not isinstance(emotion_dict, str):
|
|
max_index = emotion_dict['scores'].index(max(emotion_dict['scores']))
|
|
print("emotion: " +emotion_dict['labels'][max_index])
|
|
|
|
|
|
asr_file_stream() |