from modelscope.pipelines import pipeline import numpy as np import os import pdb ERES2NETV2 = { "task": 'speaker-verification', "model_name": 'damo/speech_eres2netv2_sv_zh-cn_16k-common', "model_revision": 'v1.0.1', "save_embeddings": False } # 保存 embedding 的路径 DEFALUT_SAVE_PATH = r".\takway\savePath" class speaker_verfication: def __init__(self, task='speaker-verification', model_name='damo/speech_eres2netv2_sv_zh-cn_16k-common', model_revision='v1.0.1', device="cuda", save_embeddings=False): self.pipeline = pipeline( task=task, model=model_name, model_revision=model_revision, device=device) self.save_embeddings = save_embeddings def wav2embeddings(self, speaker_1_wav): result = self.pipeline([speaker_1_wav], output_emb=True) speaker_1_emb = result['embs'][0] return speaker_1_emb def _verifaction(self, speaker_1_wav, speaker_2_wav, threshold, save_path): if not self.save_embeddings: result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold) return result["text"] else: result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold, output_emb=True) speaker1_emb = result["embs"][0] speaker2_emb = result["embs"][1] np.save(os.path.join(save_path, "speaker_1.npy"), speaker1_emb) return result['outputs']["text"] def _verifaction_from_embedding(self, base_emb, speaker_2_wav, threshold): base_emb = np.load(base_emb) result = self.pipeline([speaker_2_wav], output_emb=True) speaker2_emb = result["embs"][0] similarity = np.dot(base_emb, speaker2_emb) / (np.linalg.norm(base_emb) * np.linalg.norm(speaker2_emb)) if similarity > threshold: return "yes" else: return "no" def verfication(self, base_emb, speaker_emb, threshold=0.333, ): return np.dot(base_emb, speaker_emb) / (np.linalg.norm(base_emb) * np.linalg.norm(speaker_emb)) > threshold if __name__ == '__main__': verifier = speaker_verfication(**ERES2NETV2) verifier = speaker_verfication(save_embeddings=False) result = verifier.verfication(base_emb=None, speaker_1_wav=r"C:\Users\bing\Downloads\speaker1_a_cn_16k.wav", speaker_2_wav=r"C:\Users\bing\Downloads\speaker2_a_cn_16k.wav", threshold=0.333, save_path=r"D:\python\irving\takway_base-main\savePath" ) print("---") print(result) print(verifier.verfication(r"D:\python\irving\takway_base-main\savePath\speaker_1.npy", speaker_2_wav=r"C:\Users\bing\Downloads\speaker1_b_cn_16k.wav", threshold=0.333, ))