83 lines
2.3 KiB
Python
83 lines
2.3 KiB
Python
|
import time
|
||
|
import sys
|
||
|
import librosa
|
||
|
from funasr.utils.types import str2bool
|
||
|
|
||
|
import argparse
|
||
|
|
||
|
parser = argparse.ArgumentParser()
|
||
|
parser.add_argument("--model_dir", type=str, required=True)
|
||
|
parser.add_argument("--backend", type=str, default="onnx", help='["onnx", "torch"]')
|
||
|
parser.add_argument("--wav_file", type=str, default=None, help="amp fallback number")
|
||
|
parser.add_argument("--quantize", type=str2bool, default=False, help="quantized model")
|
||
|
parser.add_argument(
|
||
|
"--intra_op_num_threads", type=int, default=1, help="intra_op_num_threads for onnx"
|
||
|
)
|
||
|
parser.add_argument("--batch_size", type=int, default=1, help="batch_size for onnx")
|
||
|
args = parser.parse_args()
|
||
|
|
||
|
|
||
|
from funasr.runtime.python.libtorch.funasr_torch import Paraformer
|
||
|
|
||
|
if args.backend == "onnx":
|
||
|
from funasr.runtime.python.onnxruntime.funasr_onnx import Paraformer
|
||
|
|
||
|
model = Paraformer(
|
||
|
args.model_dir,
|
||
|
batch_size=args.batch_size,
|
||
|
quantize=args.quantize,
|
||
|
intra_op_num_threads=args.intra_op_num_threads,
|
||
|
)
|
||
|
|
||
|
wav_file_f = open(args.wav_file, "r")
|
||
|
wav_files = wav_file_f.readlines()
|
||
|
|
||
|
# warm-up
|
||
|
total = 0.0
|
||
|
num = 30
|
||
|
wav_path = (
|
||
|
wav_files[0].split("\t")[1].strip()
|
||
|
if "\t" in wav_files[0]
|
||
|
else wav_files[0].split(" ")[1].strip()
|
||
|
)
|
||
|
for i in range(num):
|
||
|
beg_time = time.time()
|
||
|
result = model(wav_path)
|
||
|
end_time = time.time()
|
||
|
duration = end_time - beg_time
|
||
|
total += duration
|
||
|
print(result)
|
||
|
print(
|
||
|
"num: {}, time, {}, avg: {}, rtf: {}".format(
|
||
|
len(wav_path), duration, total / (i + 1), (total / (i + 1)) / 5.53
|
||
|
)
|
||
|
)
|
||
|
|
||
|
# infer time
|
||
|
wav_path = []
|
||
|
beg_time = time.time()
|
||
|
for i, wav_path_i in enumerate(wav_files):
|
||
|
wav_path_i = (
|
||
|
wav_path_i.split("\t")[1].strip()
|
||
|
if "\t" in wav_path_i
|
||
|
else wav_path_i.split(" ")[1].strip()
|
||
|
)
|
||
|
wav_path += [wav_path_i]
|
||
|
result = model(wav_path)
|
||
|
end_time = time.time()
|
||
|
duration = (end_time - beg_time) * 1000
|
||
|
print("total_time_comput_ms: {}".format(int(duration)))
|
||
|
|
||
|
duration_time = 0.0
|
||
|
for i, wav_path_i in enumerate(wav_files):
|
||
|
wav_path = (
|
||
|
wav_path_i.split("\t")[1].strip()
|
||
|
if "\t" in wav_path_i
|
||
|
else wav_path_i.split(" ")[1].strip()
|
||
|
)
|
||
|
waveform, _ = librosa.load(wav_path, sr=16000)
|
||
|
duration_time += len(waveform) / 16.0
|
||
|
print("total_time_wav_ms: {}".format(int(duration_time)))
|
||
|
|
||
|
print("total_rtf: {:.5}".format(duration / duration_time))
|