diff --git a/utils/speaker_checker_utils.py b/utils/speaker_checker_utils.py new file mode 100644 index 0000000..3ea3636 --- /dev/null +++ b/utils/speaker_checker_utils.py @@ -0,0 +1,114 @@ +from modelscope.pipelines import pipeline +import numpy as np +import os + +ERES2NETV2 = { + "task": 'speaker-verification', + "model_name": 'damo/speech_eres2netv2_sv_zh-cn_16k-common', + "model_revision": 'v1.0.1', + "save_embeddings": False +} + +# 保存 embedding 的路径 +DEFALUT_SAVE_PATH = os.path.join(os.path.dirname(os.path.dirname(__name__)), "speaker_embedding") + +class SpeakerChecker: + def __init__(self, + speaker_wav_path, + task='speaker-verification', + model_name='damo/speech_eres2netv2_sv_zh-cn_16k-common', + model_revision='v1.0.1', + device="cuda", + save_embeddings=False,): + self.pipeline = pipeline( + task=task, + model=model_name, + model_revision=model_revision, + device=device) + self.save_embeddings = save_embeddings + + + self.update_embedding_with_wav(speaker_wav_path) + + # save path 为 none 时 不将 speaker_wav_path 对应音频的 embedding 存在本地 + # save_path 不为 none 时 将 speaker_wav_path 对应音频的 embedding 存在本地对应位置 + def update_embedding_with_wav(self, speaker_wav_path, save_path=None): + self.speaker_1_emb = self.wav2embeddings(speaker_wav_path, save_path) + + def update_embedding_with_np(self, speaker_emb_path): + self.speaker_1_emb = np.load(speaker_emb_path) + + def wav2embeddings(self, speaker_1_wav, save_path=None): + result = self.pipeline([speaker_1_wav], output_emb=True) + speaker_1_emb = result['embs'][0] + if save_path is not None: + np.save(save_path, speaker_1_emb) + return speaker_1_emb + + def checker(self, audio: str, threshold=0.333): + result = self.pipeline([audio], output_emb=True) + speaker2_emb = result["embs"][0] + similarity = np.dot(self.speaker_1_emb, speaker2_emb) / (np.linalg.norm(self.speaker_1_emb) * np.linalg.norm(speaker2_emb)) + if similarity > threshold: + return True + else: + return False + + + # def _verifaction(self, speaker_1_wav, speaker_2_wav, threshold, save_path): + # if not self.save_embeddings: + # result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold) + # return result["text"] + # else: + # result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold, output_emb=True) + # speaker1_emb = result["embs"][0] + # speaker2_emb = result["embs"][1] + # np.save(os.path.join(save_path, "speaker_1.npy"), speaker1_emb) + # return result['outputs']["text"] + + # def _verifaction_from_embedding(self, base_emb, speaker_2_wav, threshold): + # base_emb = np.load(base_emb) + # result = self.pipeline([speaker_2_wav], output_emb=True) + # speaker2_emb = result["embs"][0] + # similarity = np.dot(base_emb, speaker2_emb) / (np.linalg.norm(base_emb) * np.linalg.norm(speaker2_emb)) + # if similarity > threshold: + # return "yes" + # else: + # return "no" + + # def verfication(self, + # base_emb=None, + # speaker_1_wav=None, + # speaker_2_wav=None, + # threshold=0.333, + # save_path=None): + # if base_emb is not None and speaker_1_wav is not None: + # raise ValueError("Only need one of them, base_emb or speaker_1_wav") + # if base_emb is not None and speaker_2_wav is not None: + # return self._verifaction_from_embedding(base_emb, speaker_2_wav, threshold) + # elif speaker_1_wav is not None and speaker_2_wav is not None: + # return self._verifaction(speaker_1_wav, speaker_2_wav, threshold, save_path) + # else: + # raise NotImplementedError + +if __name__ == '__main__': + # verifier = speaker_verfication(**ERES2NETV2) + + # verifier = speaker_verfication(save_embeddings=False) + # result = verifier.verfication(base_emb=None, speaker_1_wav=r"C:\Users\bing\Downloads\speaker1_a_cn_16k.wav", + # speaker_2_wav=r"C:\Users\bing\Downloads\speaker2_a_cn_16k.wav", + # threshold=0.333, + # save_path=r"D:\python\irving\takway_base-main\savePath" + # ) + # print("---") + # print(result) + # print(verifier.verfication(r"D:\python\irving\takway_base-main\savePath\speaker_1.npy", + # speaker_2_wav=r"C:\Users\bing\Downloads\speaker1_b_cn_16k.wav", + # threshold=0.333, + # )) + speaker_wav_path = r"C:\Users\bing\Downloads\speaker1_a_cn_16k.wav" + speaker_checker = SpeakerChecker(speaker_wav_path) + audio = r"C:\Users\bing\Downloads\speaker1_b_cn_16k.wav" + is_target = speaker_checker.checker(audio) + print(is_target) + \ No newline at end of file