import os import sys sys.path.append(os.path.dirname(os.path.dirname(__file__))) from audio_utils import BaseRecorder from utils.stt.modified_funasr import ModifiedRecognizer def asr_file_stream(file_path=r'.\assets\example_recording.wav'): # 读入音频文件 rec = BaseRecorder() data = rec.load_audio_file(file_path) # 创建模型 asr = ModifiedRecognizer(use_punct=True, use_emotion=True, use_speaker_ver=True) asr.session_signup("test") # 记录目标说话人 asr.initialize_speaker(r".\assets\example_recording.wav") # 语音识别 print("===============================================") text_dict = asr.streaming_recognize("test", data, auto_det_end=True) print(f"text_dict: {text_dict}") if not isinstance(text_dict, str): print("".join(text_dict['text'])) # 情感识别 print("===============================================") emotion_dict = asr.recognize_emotion(data) print(f"emotion_dict: {emotion_dict}") if not isinstance(emotion_dict, str): max_index = emotion_dict['scores'].index(max(emotion_dict['scores'])) print("emotion: " +emotion_dict['labels'][max_index]) asr_file_stream()