WIP: bing-patch-1 #2
|
@ -1,37 +1,54 @@
|
||||||
from modelscope.pipelines import pipeline
|
from modelscope.pipelines import pipeline
|
||||||
from modelscope.utils.constant import Tasks
|
from modelscope.utils.constant import Tasks
|
||||||
import json
|
import json
|
||||||
|
|
||||||
model_args = {
|
model_args = {
|
||||||
"task": Tasks.emotion_recognition,
|
"task": Tasks.emotion_recognition,
|
||||||
"model": "iic/emotion2vec_base_finetuned" # Alternative: iic/emotion2vec_plus_seed, iic/emotion2vec_plus_base, iic/emotion2vec_plus_large and iic/emotion2vec_base_finetuned
|
"model": "iic/emotion2vec_base_finetuned" # Alternative: iic/emotion2vec_plus_seed, iic/emotion2vec_plus_base, iic/emotion2vec_plus_large and iic/emotion2vec_base_finetuned
|
||||||
# "device": 不用指定,"device" 默认为gpu
|
# "device": 不用指定,"device" 默认为gpu
|
||||||
}
|
}
|
||||||
|
|
||||||
class EmotionRecognition:
|
class EmotionRecognition:
|
||||||
def __init__(self) -> None:
|
"""
|
||||||
self.initialize(model_args=model_args)
|
9-class emotion
|
||||||
|
0: angry
|
||||||
# 初始化模型
|
1: disgusted
|
||||||
def initialize(self, model_args=model_args):
|
2: fearful
|
||||||
self.inference_pipeline = pipeline(**model_args)
|
3: happy
|
||||||
|
4: neutral
|
||||||
def emotion_recognition(self,
|
5: other
|
||||||
audio:bytes,
|
6: sad
|
||||||
granularity="utterance", # 中间特征的维度,"utterance": [*768], "frame": [T*768]
|
7: surprised
|
||||||
extract_embedding=False, # 是否保留提取到的特征,False表示不保留中间特征,只保留最终结果
|
8: unknown
|
||||||
output_dir="./outputs" # 中间特征的保存位置(extract_embedding为true时有效)
|
return :
|
||||||
):
|
[{"emotion": "angry", "weight": },
|
||||||
rec_result = self.inference_pipeline(audio, granularity=granularity, extract_embedding=extract_embedding, output_dir=output_dir)
|
{"emotion": "disgusted", "weight": },
|
||||||
|
...
|
||||||
# 保存结果
|
]
|
||||||
json_list = []
|
"""
|
||||||
for emotion, score in zip(rec_result[0]["labels"], rec_result[0]["scores"]):
|
def __init__(self) -> None:
|
||||||
json_list.append({"emotion": emotion.split("/")[-1], "weight": round(score,4)})
|
self.initialize(model_args=model_args)
|
||||||
recognize_result = json.dumps(json_list)
|
|
||||||
return recognize_result
|
# 初始化模型
|
||||||
|
def initialize(self, model_args=model_args):
|
||||||
if __name__ == "__main__":
|
self.inference_pipeline = pipeline(**model_args)
|
||||||
model = EmotionRecognition()
|
|
||||||
recognize_result = model.emotion_recognition("https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
|
def emotion_recognition(self,
|
||||||
|
audio:bytes,
|
||||||
|
granularity="utterance", # 中间特征的维度,"utterance": [*768], "frame": [T*768]
|
||||||
|
extract_embedding=False, # 是否保留提取到的特征,False表示不保留中间特征,只保留最终结果
|
||||||
|
output_dir="./outputs" # 中间特征的保存位置(extract_embedding为true时有效)
|
||||||
|
):
|
||||||
|
rec_result = self.inference_pipeline(audio, granularity=granularity, extract_embedding=extract_embedding, output_dir=output_dir)
|
||||||
|
|
||||||
|
# 保存结果
|
||||||
|
json_list = []
|
||||||
|
for emotion, score in zip(rec_result[0]["labels"], rec_result[0]["scores"]):
|
||||||
|
json_list.append({"emotion": emotion.split("/")[-1], "weight": round(score,4)})
|
||||||
|
recognize_result = json.dumps(json_list)
|
||||||
|
return recognize_result
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
model = EmotionRecognition()
|
||||||
|
recognize_result = model.emotion_recognition("https://isv-data.oss-cn-hangzhou.aliyuncs.com/ics/MaaS/ASR/test_audio/asr_example_zh.wav")
|
||||||
print(recognize_result)
|
print(recognize_result)
|
Loading…
Reference in New Issue