speaker-checker
This commit is contained in:
parent
f1076b86bb
commit
2bed7ad488
|
@ -0,0 +1,114 @@
|
|||
from modelscope.pipelines import pipeline
|
||||
import numpy as np
|
||||
import os
|
||||
|
||||
ERES2NETV2 = {
|
||||
"task": 'speaker-verification',
|
||||
"model_name": 'damo/speech_eres2netv2_sv_zh-cn_16k-common',
|
||||
"model_revision": 'v1.0.1',
|
||||
"save_embeddings": False
|
||||
}
|
||||
|
||||
# 保存 embedding 的路径
|
||||
DEFALUT_SAVE_PATH = os.path.join(os.path.dirname(os.path.dirname(__name__)), "speaker_embedding")
|
||||
|
||||
class SpeakerChecker:
|
||||
def __init__(self,
|
||||
speaker_wav_path,
|
||||
task='speaker-verification',
|
||||
model_name='damo/speech_eres2netv2_sv_zh-cn_16k-common',
|
||||
model_revision='v1.0.1',
|
||||
device="cuda",
|
||||
save_embeddings=False,):
|
||||
self.pipeline = pipeline(
|
||||
task=task,
|
||||
model=model_name,
|
||||
model_revision=model_revision,
|
||||
device=device)
|
||||
self.save_embeddings = save_embeddings
|
||||
|
||||
|
||||
self.update_embedding_with_wav(speaker_wav_path)
|
||||
|
||||
# save path 为 none 时 不将 speaker_wav_path 对应音频的 embedding 存在本地
|
||||
# save_path 不为 none 时 将 speaker_wav_path 对应音频的 embedding 存在本地对应位置
|
||||
def update_embedding_with_wav(self, speaker_wav_path, save_path=None):
|
||||
self.speaker_1_emb = self.wav2embeddings(speaker_wav_path, save_path)
|
||||
|
||||
def update_embedding_with_np(self, speaker_emb_path):
|
||||
self.speaker_1_emb = np.load(speaker_emb_path)
|
||||
|
||||
def wav2embeddings(self, speaker_1_wav, save_path=None):
|
||||
result = self.pipeline([speaker_1_wav], output_emb=True)
|
||||
speaker_1_emb = result['embs'][0]
|
||||
if save_path is not None:
|
||||
np.save(save_path, speaker_1_emb)
|
||||
return speaker_1_emb
|
||||
|
||||
def checker(self, audio: str, threshold=0.333):
|
||||
result = self.pipeline([audio], output_emb=True)
|
||||
speaker2_emb = result["embs"][0]
|
||||
similarity = np.dot(self.speaker_1_emb, speaker2_emb) / (np.linalg.norm(self.speaker_1_emb) * np.linalg.norm(speaker2_emb))
|
||||
if similarity > threshold:
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
|
||||
# def _verifaction(self, speaker_1_wav, speaker_2_wav, threshold, save_path):
|
||||
# if not self.save_embeddings:
|
||||
# result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold)
|
||||
# return result["text"]
|
||||
# else:
|
||||
# result = self.pipeline([speaker_1_wav, speaker_2_wav], thr=threshold, output_emb=True)
|
||||
# speaker1_emb = result["embs"][0]
|
||||
# speaker2_emb = result["embs"][1]
|
||||
# np.save(os.path.join(save_path, "speaker_1.npy"), speaker1_emb)
|
||||
# return result['outputs']["text"]
|
||||
|
||||
# def _verifaction_from_embedding(self, base_emb, speaker_2_wav, threshold):
|
||||
# base_emb = np.load(base_emb)
|
||||
# result = self.pipeline([speaker_2_wav], output_emb=True)
|
||||
# speaker2_emb = result["embs"][0]
|
||||
# similarity = np.dot(base_emb, speaker2_emb) / (np.linalg.norm(base_emb) * np.linalg.norm(speaker2_emb))
|
||||
# if similarity > threshold:
|
||||
# return "yes"
|
||||
# else:
|
||||
# return "no"
|
||||
|
||||
# def verfication(self,
|
||||
# base_emb=None,
|
||||
# speaker_1_wav=None,
|
||||
# speaker_2_wav=None,
|
||||
# threshold=0.333,
|
||||
# save_path=None):
|
||||
# if base_emb is not None and speaker_1_wav is not None:
|
||||
# raise ValueError("Only need one of them, base_emb or speaker_1_wav")
|
||||
# if base_emb is not None and speaker_2_wav is not None:
|
||||
# return self._verifaction_from_embedding(base_emb, speaker_2_wav, threshold)
|
||||
# elif speaker_1_wav is not None and speaker_2_wav is not None:
|
||||
# return self._verifaction(speaker_1_wav, speaker_2_wav, threshold, save_path)
|
||||
# else:
|
||||
# raise NotImplementedError
|
||||
|
||||
if __name__ == '__main__':
|
||||
# verifier = speaker_verfication(**ERES2NETV2)
|
||||
|
||||
# verifier = speaker_verfication(save_embeddings=False)
|
||||
# result = verifier.verfication(base_emb=None, speaker_1_wav=r"C:\Users\bing\Downloads\speaker1_a_cn_16k.wav",
|
||||
# speaker_2_wav=r"C:\Users\bing\Downloads\speaker2_a_cn_16k.wav",
|
||||
# threshold=0.333,
|
||||
# save_path=r"D:\python\irving\takway_base-main\savePath"
|
||||
# )
|
||||
# print("---")
|
||||
# print(result)
|
||||
# print(verifier.verfication(r"D:\python\irving\takway_base-main\savePath\speaker_1.npy",
|
||||
# speaker_2_wav=r"C:\Users\bing\Downloads\speaker1_b_cn_16k.wav",
|
||||
# threshold=0.333,
|
||||
# ))
|
||||
speaker_wav_path = r"C:\Users\bing\Downloads\speaker1_a_cn_16k.wav"
|
||||
speaker_checker = SpeakerChecker(speaker_wav_path)
|
||||
audio = r"C:\Users\bing\Downloads\speaker1_b_cn_16k.wav"
|
||||
is_target = speaker_checker.checker(audio)
|
||||
print(is_target)
|
||||
|
Loading…
Reference in New Issue