1
0
Fork 0
TakwayPlatform/utils/stt/speaker_ver_utils.py

86 lines
3.6 KiB
Python

from modelscope.pipelines import pipeline
import numpy as np
import os
ERES2NETV2 = {
"task": 'speaker-verification',
"model_name": 'damo/speech_eres2netv2_sv_zh-cn_16k-common',
"model_revision": 'v1.0.1',
"save_embeddings": False
}
# 保存 embedding 的路径
DEFALUT_SAVE_PATH = os.path.join(os.path.dirname(os.path.dirname(__name__)), "speaker_embedding")
class speaker_verfication:
def __init__(self,
task='speaker-verification',
model_name='damo/speech_eres2netv2_sv_zh-cn_16k-common',
model_revision='v1.0.1',
device="cuda",
save_embeddings=False):
self.pipeline = pipeline(
task=task,
model=model_name,
model_revision=model_revision,
device=device)
self.save_embeddings = save_embeddings
def wav2embeddings(self, speaker_1_wav, save_path=None):
result = self.pipeline([speaker_1_wav], output_emb=True)
speaker_1_emb = result['embs'][0]
if save_path is not None:
np.save(save_path, speaker_1_emb)
return speaker_1_emb
def _verifaction(self, speaker_1_wav, speaker_2_wav, threshold, save_path):
if not self.save_embeddings:
result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold)
return result["text"]
else:
result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold, output_emb=True)
speaker1_emb = result["embs"][0]
speaker2_emb = result["embs"][1]
np.save(os.path.join(save_path, "speaker_1.npy"), speaker1_emb)
return result['outputs']["text"]
def _verifaction_from_embedding(self, base_emb, speaker_2_wav, threshold):
base_emb = np.load(base_emb)
result = self.pipeline([speaker_2_wav], output_emb=True)
speaker2_emb = result["embs"][0]
similarity = np.dot(base_emb, speaker2_emb) / (np.linalg.norm(base_emb) * np.linalg.norm(speaker2_emb))
if similarity > threshold:
return "yes"
else:
return "no"
def verfication(self,
base_emb=None,
speaker_1_wav=None,
speaker_2_wav=None,
threshold=0.333,
save_path=None):
if base_emb is not None and speaker_1_wav is not None:
raise ValueError("Only need one of them, base_emb or speaker_1_wav")
if base_emb is not None and speaker_2_wav is not None:
return self._verifaction_from_embedding(base_emb, speaker_2_wav, threshold)
elif speaker_1_wav is not None and speaker_2_wav is not None:
return self._verifaction(speaker_1_wav, speaker_2_wav, threshold, save_path)
else:
raise NotImplementedError
if __name__ == '__main__':
verifier = speaker_verfication(**ERES2NETV2)
verifier = speaker_verfication(save_embeddings=False)
result = verifier.verfication(base_emb=None, speaker_1_wav=r"C:\Users\bing\Downloads\speaker1_a_cn_16k.wav",
speaker_2_wav=r"C:\Users\bing\Downloads\speaker2_a_cn_16k.wav",
threshold=0.333,
save_path=r"D:\python\irving\takway_base-main\savePath"
)
print("---")
print(result)
print(verifier.verfication(r"D:\python\irving\takway_base-main\savePath\speaker_1.npy",
speaker_2_wav=r"C:\Users\bing\Downloads\speaker1_b_cn_16k.wav",
threshold=0.333,
))