TakwayPlatform/examples/modified_funasr_demo.py

import os
import sys
sys.path.append(os.path.dirname(os.path.dirname(__file__)))

from audio_utils import BaseRecorder
from utils.stt.modified_funasr import ModifiedRecognizer


def asr_file_stream(file_path=r'.\assets\example_recording.wav'):
    # 读入音频文件
    rec = BaseRecorder()
    data = rec.load_audio_file(file_path)

    # 创建模型
    asr = ModifiedRecognizer(use_punct=True, use_emotion=True, use_speaker_ver=True)
    asr.session_signup("test")

    # 记录目标说话人
    asr.initialize_speaker(r".\assets\example_recording.wav")

    # 语音识别
    print("===============================================")
    text_dict = asr.streaming_recognize("test", data, auto_det_end=True)
    print(f"text_dict: {text_dict}")

    if not isinstance(text_dict, str):
        print("".join(text_dict['text']))

    # 情感识别
    print("===============================================")
    emotion_dict = asr.recognize_emotion(data)
    print(f"emotion_dict: {emotion_dict}")
    if not isinstance(emotion_dict, str):
        max_index = emotion_dict['scores'].index(max(emotion_dict['scores']))
        print("emotion: " +emotion_dict['labels'][max_index])


asr_file_stream()