199 lines
8.5 KiB
Python
199 lines
8.5 KiB
Python
from takway.clients.web_socket_client_utils import WebSocketClinet
|
|
import pvporcupine
|
|
import pyaudio
|
|
import platform
|
|
|
|
|
|
if __name__ == '__main__':
|
|
|
|
server_url = 'ws://222.195.90.129:8001/api/chat/streaming/temporary'
|
|
|
|
# session_id = 'de9dc06c-2d74-42f8-9c11-9797f9fe0d01' # 麓旬
|
|
# session_id = '3f7c2d8f-dc8a-4f1b-9fe0-6bf56c759a41' # 砚文
|
|
# session_id = '1fd56ce7-f21e-4eb4-8dc8-3a17960ae328' # 越哥
|
|
# session_id = 'bfcc4bbc-ac94-4604-8fd7-50545b8c6a5f' # 楼总
|
|
# session_id = '469f4a99-12a5-45a6-bc91-353df07423b6' # 鸿志
|
|
|
|
# session_id = '9ad8f855-f87c-41c8-a1f3-12e39d013c8c' # 1 积木 刚木
|
|
# session_id = '13e14fd9-a212-4cf1-a09b-5890344c769c' # 2
|
|
# session_id = 'e6e76e50-cb4c-4fe7-8660-8278e712c49b' # 3
|
|
# session_id = '6cbbfa55-58e3-437a-94c4-65eee1fa88a6' # 4
|
|
# session_id = 'c14cfd9f-3609-46f9-90aa-ba5a80b65015' # 5
|
|
|
|
|
|
excute_args = {}
|
|
# excute_args = {'enable': True}
|
|
|
|
|
|
system = platform.system()
|
|
if system == 'Windows':
|
|
print("WebSocketClinet runs on Windows system.")
|
|
board = None
|
|
elif system == 'Linux':
|
|
# board = 'v329'
|
|
board = 'orangepi'
|
|
# ACCESS_KEY = 'neOpatzY/mTzSyxdHs+ajNfpY/7SX1WrlqP/D6+5Km8THUxfZdcauQ==' # luxun
|
|
# ACCESS_KEY = 'KwdWw3V5X9Dz0c9x+5HmGAMi7GbW0kvnaGOAPloIAYwhp06jNt5baw==' # yuyue
|
|
# ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw==' # gaohz (hzgao2000@gmail.com)
|
|
# ACCESS_KEY = 'Zo6Vx8YElrjMuA30K9yWMZvD06gcgOfyTi4rC6PPoqWL9mkhA/N/Lg==' # loujc
|
|
# ACCESS_KEY = 'Ce5DkiDBQ9B7QNhebpQJyw/f3nhlqO5960yDUfkNAcl9gSj7RzIt6w==' # jinzc
|
|
|
|
mircophone_device = None
|
|
speaker_device = None
|
|
|
|
|
|
if board == 'v329':
|
|
import gpiod as gpio
|
|
|
|
keywords = ['hey google', 'ok google']
|
|
keyword_paths = None
|
|
model_path = None
|
|
|
|
keywords = ['可莉可莉']
|
|
keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"]
|
|
model_path = r"picovoice_models/porcupine_params_zh.pv"
|
|
|
|
hd_trigger = 'button'
|
|
player = 'maixsense'
|
|
elif board == 'orangepi':
|
|
|
|
# keywords = ['hey google']
|
|
keywords = ['哔卡', '刚木'] # '芭比', , '星云'
|
|
keyword_paths = None
|
|
model_path = None
|
|
#
|
|
keyword_paths = ['models/哔卡_zh_raspberry-pi_v3_0_0/哔卡_zh_raspberry-pi_v3_0_0.ppn', 'models/刚木_zh_raspberry-pi_v3_0_0/刚木_zh_raspberry-pi_v3_0_0.ppn']
|
|
# "models/芭比_zh_raspberry-pi_v3_0_0/芭比_zh_raspberry-pi_v3_0_0.ppn",
|
|
# , 'models/星云_zh_raspberry-pi_v3_0_0/星云_zh_raspberry-pi_v3_0_0.ppn'
|
|
model_path = "models/porcupine_params_zh.pv"
|
|
|
|
hd_trigger = 'button'
|
|
|
|
mircophone_device = 2
|
|
speaker_device = 2
|
|
|
|
else:
|
|
|
|
keywords = ['hey google', 'ok google']
|
|
keyword_paths = None
|
|
model_path = None
|
|
|
|
|
|
hd_trigger = 'keyboard'
|
|
player = 'opencv'
|
|
|
|
|
|
import argparse
|
|
parser = argparse.ArgumentParser()
|
|
# server params
|
|
|
|
|
|
# recorder paramters
|
|
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
|
|
parser.add_argument('--access_key',default=ACCESS_KEY,
|
|
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
|
|
parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list,
|
|
default=keywords,
|
|
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
|
|
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='')
|
|
parser.add_argument('--keyword_paths',nargs='+',
|
|
default=keyword_paths,
|
|
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
|
|
parser.add_argument('--library_path',default=None,
|
|
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
|
|
parser.add_argument('--model_path',
|
|
default=model_path,
|
|
help='Absolute path to the file containing model parameters. '
|
|
'Default: using the library provided by `pvporcupine`')
|
|
parser.add_argument('--sensitivities',type=float,
|
|
default=0.9,
|
|
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
|
|
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
|
|
"will be used.")
|
|
parser.add_argument('--hd_trigger', type=str,
|
|
# default='keyboard',
|
|
default=hd_trigger,
|
|
help='Hardware trigger')
|
|
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
|
|
|
|
# recorder paramters
|
|
parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的
|
|
parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate')
|
|
parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format')
|
|
parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels')
|
|
parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name')
|
|
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
|
|
parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec')
|
|
parser.add_argument('--max_slience_time', type=int, default=10, help='Max slient time when recording, sec')
|
|
parser.add_argument('--min_act_time', type=float, default=0.3, help='Min inactive time, sec') # 等待多少秒沉默就发送音频
|
|
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
|
|
|
|
# player paramters
|
|
parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index')
|
|
parser.add_argument('--OUT_CHUNK', type=int, default=512, help='Player chunk size')
|
|
parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate')
|
|
parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format')
|
|
parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels')
|
|
parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name')
|
|
parser.add_argument('--OUT_frames_per_buffer', type=int, default=11025, help='Player frames per buffer')
|
|
|
|
# log paramters
|
|
parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file')
|
|
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
|
|
|
|
parser.add_argument('--debug', type=bool, default=False, help='Debug mode')
|
|
args = parser.parse_args()
|
|
|
|
# sort out args and params
|
|
server_args = {
|
|
'server_url': server_url,
|
|
'session_id': session_id,
|
|
}
|
|
|
|
recorder_args = {
|
|
'board': board,
|
|
'access_key': args.access_key,
|
|
'keywords': args.keywords,
|
|
'keyword_paths': args.keyword_paths,
|
|
'library_path': args.library_path,
|
|
'model_path': args.model_path,
|
|
'sensitivities': args.sensitivities,
|
|
'hd_trigger': args.hd_trigger,
|
|
'keyboard_key': args.keyboard_key,
|
|
'voice_trigger': args.voice_trigger,
|
|
'CHUNK': args.IN_CHUNK,
|
|
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
|
|
'CHANNELS': args.IN_CHANNELS,
|
|
'RATE': args.IN_RATE,
|
|
'min_act_time': args.min_act_time,
|
|
'max_slience_time': args.max_slience_time,
|
|
'min_stream_record_time': args.min_stream_record_time,
|
|
'input_device_index': args.mircophone_device,
|
|
'frames_per_buffer': args.IN_frames_per_buffer,
|
|
}
|
|
|
|
player_args = {
|
|
'output_device_index': args.speaker_device,
|
|
'CHUNK': args.OUT_CHUNK,
|
|
'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32,
|
|
'CHANNELS': args.OUT_CHANNELS,
|
|
'RATE': args.OUT_RATE,
|
|
'filename': args.OUT_filename,
|
|
'frames_per_buffer': args.OUT_frames_per_buffer,
|
|
}
|
|
|
|
log_args = {
|
|
'log_file': args.log_file,
|
|
'log_level': args.log_level,
|
|
}
|
|
|
|
|
|
localclient = WebSocketClinet(
|
|
board=board,
|
|
server_args=server_args,
|
|
recorder_args=recorder_args,
|
|
player_args=player_args,
|
|
excute_args=excute_args,
|
|
log_args=log_args)
|
|
localclient.process_init()
|