pun_emo_speaker_utils/demo.py

29 lines
1.0 KiB
Python

from takway.audio_utils import BaseRecorder
from takway.stt.funasr_utils import FunAutoSpeechRecognizer
from takway.stt.modified_funasr import ModifiedRecognizer
def asr_file_stream(file_path=r'.\examples\example_recording.wav'):
rec = BaseRecorder()
data = rec.load_audio_file(file_path)
asr = ModifiedRecognizer(use_punct=True, use_emotion=True, use_speaker_ver=True)
asr.initialize_speaker(r".\examples\example_recording.wav")
text_dict = asr.streaming_recognize(data, auto_det_end=True)
print("===============================================")
print(f"text_dict: {text_dict}")
if not isinstance(text_dict, str):
print("".join(text_dict['text']))
print("===============================================")
emotion_dict = asr.recognize_emotion(data)
print(f"emotion_dict: {emotion_dict}")
if not isinstance(emotion_dict, str):
max_index = emotion_dict['scores'].index(max(emotion_dict['scores']))
print("emotion: " +emotion_dict['labels'][max_index])
asr_file_stream()