diff --git a/utils/emotion2vec_utils.py b/utils/emotion2vec_utils.py index 92564ea..bd15afb 100644 --- a/utils/emotion2vec_utils.py +++ b/utils/emotion2vec_utils.py @@ -1,37 +1,54 @@ -from modelscope.pipelines import pipeline -from modelscope.utils.constant import Tasks -import json - -model_args = { - "task": Tasks.emotion_recognition, - "model": "iic/emotion2vec_base_finetuned" # Alternative: iic/emotion2vec_plus_seed, iic/emotion2vec_plus_base, iic/emotion2vec_plus_large and iic/emotion2vec_base_finetuned - # "device": 不用指定,"device" 默认为gpu -} - -class EmotionRecognition: - def __init__(self) -> None: - self.initialize(model_args=model_args) - - # 初始化模型 - def initialize(self, model_args=model_args): - self.inference_pipeline = pipeline(**model_args) - - def emotion_recognition(self, - audio:bytes, - granularity="utterance", # 中间特征的维度,"utterance": [*768], "frame": [T*768] - extract_embedding=False, # 是否保留提取到的特征,False表示不保留中间特征,只保留最终结果 - output_dir="./outputs" # 中间特征的保存位置(extract_embedding为true时有效) - ): - rec_result = self.inference_pipeline(audio, granularity=granularity, extract_embedding=extract_embedding, output_dir=output_dir) - - # 保存结果 - json_list = [] - for emotion, score in zip(rec_result[0]["labels"], rec_result[0]["scores"]): - json_list.append({"emotion": emotion.split("/")[-1], "weight": round(score,4)}) - recognize_result = json.dumps(json_list) - return recognize_result - -if __name__ == "__main__": - model = EmotionRecognition() - recognize_result = model.emotion_recognition("https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav") +from modelscope.pipelines import pipeline +from modelscope.utils.constant import Tasks +import json + +model_args = { + "task": Tasks.emotion_recognition, + "model": "iic/emotion2vec_base_finetuned" # Alternative: iic/emotion2vec_plus_seed, iic/emotion2vec_plus_base, iic/emotion2vec_plus_large and iic/emotion2vec_base_finetuned + # "device": 不用指定,"device" 默认为gpu +} + +class EmotionRecognition: +""" + 9-class emotion + 0: angry + 1: disgusted + 2: fearful + 3: happy + 4: neutral + 5: other + 6: sad + 7: surprised + 8: unknown + return : + [{"emotion": "angry", "weight": }, + {"emotion": "disgusted", "weight": }, + ... + ] +""" + def __init__(self) -> None: + self.initialize(model_args=model_args) + + # 初始化模型 + def initialize(self, model_args=model_args): + self.inference_pipeline = pipeline(**model_args) + + def emotion_recognition(self, + audio:bytes, + granularity="utterance", # 中间特征的维度,"utterance": [*768], "frame": [T*768] + extract_embedding=False, # 是否保留提取到的特征,False表示不保留中间特征,只保留最终结果 + output_dir="./outputs" # 中间特征的保存位置(extract_embedding为true时有效) + ): + rec_result = self.inference_pipeline(audio, granularity=granularity, extract_embedding=extract_embedding, output_dir=output_dir) + + # 保存结果 + json_list = [] + for emotion, score in zip(rec_result[0]["labels"], rec_result[0]["scores"]): + json_list.append({"emotion": emotion.split("/")[-1], "weight": round(score,4)}) + recognize_result = json.dumps(json_list) + return recognize_result + +if __name__ == "__main__": + model = EmotionRecognition() + recognize_result = model.emotion_recognition("https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav") print(recognize_result) \ No newline at end of file