From 346559fb900eefaed732b17711170b95925e1dd1 Mon Sep 17 00:00:00 2001 From: IrvingGao <1729854488@qq.com> Date: Thu, 23 May 2024 16:07:23 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E4=BA=A4=E4=BA=92=E9=80=BB=E8=BE=91?= =?UTF-8?q?=EF=BC=8C=E9=95=BF=E6=8C=89/=E7=9F=AD=E6=8C=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- takway/audio_utils.py | 2 +- takway/board/keyboard.py | 10 +- takway/board/orangepi.py | 68 +++-- takway/clients/client_utils.py | 6 + takway/clients/web_socket_client_utils.py | 303 ++++++++++++++++------ tools/keyboard_io.py | 6 + ws_client.py | 4 +- 7 files changed, 281 insertions(+), 118 deletions(-) create mode 100644 tools/keyboard_io.py diff --git a/takway/audio_utils.py b/takway/audio_utils.py index e15f832..be2f8fd 100644 --- a/takway/audio_utils.py +++ b/takway/audio_utils.py @@ -396,7 +396,7 @@ except: webrtcvad_available = False class VADRecorder(HDRecorder): - def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=1,**kwargs): + def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=0.3,**kwargs): super().__init__(**kwargs) if webrtcvad_available: self.vad = webrtcvad.Vad(vad_sensitivity) diff --git a/takway/board/keyboard.py b/takway/board/keyboard.py index 2d4e24d..d0eee7b 100644 --- a/takway/board/keyboard.py +++ b/takway/board/keyboard.py @@ -12,7 +12,7 @@ class Keyboard(BaseHardware): self.keyboard_key = keyboard_key self.init_hd_thread() - self.power_status = False # 单次触发按键状态 + self.long_power_status = False # 单次触发按键状态 def hd_detection_loop(self): keyboard_status = False @@ -27,10 +27,12 @@ class Keyboard(BaseHardware): time.sleep(0.001) if not self.shared_hd_status and last_status: - self.power_status = ~self.power_status - if self.power_status: + self.long_power_status = ~self.long_power_status + if self.long_power_status: print("Chating mode.") else: print("Slience mode.") print(f"pres time: {datetime.datetime.now()}") - last_status = self.shared_hd_status \ No newline at end of file + last_status = self.shared_hd_status + + \ No newline at end of file diff --git a/takway/board/orangepi.py b/takway/board/orangepi.py index ebe54aa..49d8c6a 100644 --- a/takway/board/orangepi.py +++ b/takway/board/orangepi.py @@ -7,7 +7,6 @@ try: except: pass - ''' | GPIO | LED | | -- | - - | @@ -17,7 +16,7 @@ except: | 3 | 蓝色 | | 4 | 白色 | -| GPIO | BUTTOM | +| GPIO | BUTTON | | -- | ---- | | 6 | 按键1 | | 8 | 按键2 | @@ -40,47 +39,62 @@ class OrangePi(BaseHardware): self.led_set_status_2 = False self.power_status = False # 单次触发按键状态 - + self.long_power_status = False + self.short_power_status = False + self.button_init() self.init_hd_thread() def button_init(self): wiringpi.wiringPiSetup() # GPIO 输出模式 - wiringpi.pinMode(self.LED_PIN_red,GPIO.OUTPUT) - wiringpi.pinMode(self.LED_PIN_yellow,GPIO.OUTPUT) - wiringpi.pinMode(self.LED_PIN_green,GPIO.OUTPUT) - wiringpi.pinMode(self.LED_PIN_blue,GPIO.OUTPUT) - wiringpi.pinMode(self.LED_PIN_white,GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_red, GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_yellow, GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_green, GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_blue, GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_white, GPIO.OUTPUT) # GPIO 输入模式 - wiringpi.pinMode(self.BUTTON_PIN_1,GPIO.INPUT) - wiringpi.pinMode(self.BUTTON_PIN_2,GPIO.INPUT) + wiringpi.pinMode(self.BUTTON_PIN_1, GPIO.INPUT) + wiringpi.pinMode(self.BUTTON_PIN_2, GPIO.INPUT) def init_hd_thread(self): - # hd_threads = [threading.Thread(target=self.hd_detection_loop), - # threading.Thread(target=self.hd_detection_loop_2)] hd_threads = [threading.Thread(target=self.hd_detection_loop)] for hd_thread in hd_threads: hd_thread.start() def hd_detection_loop(self): - keyboard_status = False - last_status = False + last_status_1 = False + press_time_1 = None + while True: - self.button_status = wiringpi.digitalRead(self.BUTTON_PIN_1) - if self.button_status: - wiringpi.digitalWrite(self.LED_PIN_red, GPIO.LOW) - else: - wiringpi.digitalWrite(self.LED_PIN_red,GPIO.HIGH) + button_status_1 = wiringpi.digitalRead(self.BUTTON_PIN_1) + if not button_status_1 and last_status_1: + press_time_1 = datetime.datetime.now() + elif button_status_1 and not last_status_1: + if press_time_1: + press_duration = (datetime.datetime.now() - press_time_1).total_seconds() + print(f"press_duration: {press_duration}") + if press_duration > 1: + self.long_power_status = ~self.long_power_status + print(f"long_power_status: {self.long_power_status}") + else: + self.short_power_status = ~self.short_power_status + print(f"short_power_status: {self.short_power_status}") + press_time_1 = None + + last_status_1 = button_status_1 - if not self.button_status and last_status: - self.power_status = ~self.power_status - if self.power_status: - print("Chating mode.") - else: - print("Slience mode.") - print(f"pres time: {datetime.datetime.now()}") - last_status = self.button_status + # 更新LED状态 + if self.long_power_status: + self.set_led_on('white') + else: + self.short_power_status = False + self.set_led_off('white') + + if self.short_power_status: + self.set_led_on('yellow') + else: + self.set_led_off('yellow') def set_led_on(self, color='red'): wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH) diff --git a/takway/clients/client_utils.py b/takway/clients/client_utils.py index d795925..4ba1893 100644 --- a/takway/clients/client_utils.py +++ b/takway/clients/client_utils.py @@ -117,6 +117,12 @@ class BaseWebSocketClient: ''' self.websocket = create_connection(self.server_url) + def close_client(self): + ''' + Close the client. + ''' + self.websocket.close() + def send_per_data(self, text: str = '', audio: bytes = b'', diff --git a/takway/clients/web_socket_client_utils.py b/takway/clients/web_socket_client_utils.py index c25e7fe..e68c206 100644 --- a/takway/clients/web_socket_client_utils.py +++ b/takway/clients/web_socket_client_utils.py @@ -39,7 +39,7 @@ class WebSocketClinet: self.board = board # server_args self.server_args = server_args - # recorder_args + # self.recorder_args self.recorder_args = recorder_args # player_args self.player_args = player_args @@ -57,9 +57,15 @@ class WebSocketClinet: self.audio_play_queue = manager.Queue() self.excute_queue = manager.Queue() - # 多进程标志为 - self.mircophone_active_set = manager.Event() - self.speaker_active_set = manager.Event() + # 唤醒事件 + self.wakeup_event = manager.Event() + # 打断事件 + self.interrupt_event = manager.Event() + + # 监听事件 + self.listening_event = manager.Event() + # 播放事件 + self.speaking_event = manager.Event() processes = [ multiprocessing.Process(target=self.audio_process), @@ -73,6 +79,7 @@ class WebSocketClinet: for process in processes: time.sleep(0.5) + # time.sleep(2) process.start() for process in processes: process.join() @@ -89,6 +96,7 @@ class WebSocketClinet: # TODO: press_type = self.recorder_args.pop('press_type') max_slience_time = self.recorder_args.pop('max_slience_time') + board = self.recorder_args.pop('board', None) if voice_trigger: recorder = PicovoiceRecorder(**self.recorder_args) else: @@ -100,14 +108,73 @@ class WebSocketClinet: # TODO: recorder.press_type = press_type recorder.max_slience_time = max_slience_time + recorder.board = board + + # self.hardware = recorder.hardware + # self.long_power_status = recorder.hardware.long_power_status + + print(f"recorder.min_act_time: {recorder.min_act_time}") + print(f"recorder.max_slience_time: {recorder.max_slience_time}") print("Audio Process started.") - print("Waiting for wake up...") - # recorder.hardware.set_led_on("green") + # create threads + threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))] + if voice_trigger: + vioce_threads = [ + threading.Thread(target=self.voice_trigger_thread, args=(recorder,)), + ] + threads.extend(vioce_threads) + for thread in threads: + thread.start() + print("Audio Process started.") + + while True: + for thread in threads: + thread.join() + print(f"audio process exit") ; exit() + + + def hardware_trigger_thread(self, recorder): + """hardware_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + print("Hardware trigger thread started.") + last_short_power_status = False + while True: + # 开关按键被按下 + if recorder.hardware.long_power_status: + if not self.wakeup_event.is_set(): + self.wakeup_event.set() + # 短时按键被按下,打断 + try: + if recorder.hardware.short_power_status and not last_short_power_status: + self.interrupt_event.set() + recorder.hardware.short_power_status = False + print("Interrupt conversation.") + last_short_power_status = ~recorder.hardware.short_power_status + else: + last_short_power_status = recorder.hardware.short_power_status + + except: + pass + else: + self.wakeup_event.clear() + time.sleep(0.1) + + def voice_trigger_thread(self, recorder): + """voice_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + board = recorder.board + print("Waiting for wake up...") + if board == 'orangepi': + recorder.hardware.set_led_on("green") while True: - if self.shared_waiting: - continue data = recorder.record_chunk_voice( CHUNK=recorder.porcupine.frame_length, return_type=None, @@ -116,12 +183,25 @@ class WebSocketClinet: record_chunk_size = recorder.vad_chunk_size + # 开关按键被按下或被关键词唤醒 - if recorder.hardware.power_status or recorder.is_wakeup(data): - # recorder.hardware.set_led_on("blue") - pass + if self.wakeup_event.is_set(): + print(f"{datetime.now()}: Wake up by button.") else: - continue + if recorder.is_wakeup(data): + self.wakeup_event.set() + recorder.hardware.long_power_status = True + print(f"{datetime.now()}: wake up by voice.") + else: + continue + + # 设置为倾听模式 + self.listening_event.set() + self.speaking_event.clear() + + if board == 'orangepi': + recorder.hardware.set_led_on("blue") + # wake up is_bgn = True is_end = False @@ -132,51 +212,59 @@ class WebSocketClinet: print("Start recording...") # 准备对话状态 while True: - # 语音活动检测 - data = recorder.record_chunk_voice( - CHUNK=record_chunk_size, - return_type=None, - exception_on_overflow=False) - is_speech = recorder.is_speech(data) - - # 判断状态 - if is_speech: - print("valid voice") - slience_bgn_t = time.time() - frames.append(data) - else: - slience_time = time.time() - slience_bgn_t - - # 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束 - if slience_time > recorder.max_slience_time or not recorder.hardware.power_status: + if not self.wakeup_event.is_set(): break - # 短时沉默结束单次对话:沉默时间超过一定时间段(0.5s左右),则发送数据 - if slience_time > recorder.min_act_time: - is_end = True - is_bgn = False - - if not is_speech: - continue - - # 流式发送数据 - stream_reset_status = self.stream_record_process( - bytes_frames=recorder.write_wave_bytes(frames), - frames_size=len(frames), - record_chunk_size=record_chunk_size, - sample_rate=recorder.RATE, - min_stream_record_time=recorder.min_stream_record_time, - is_bgn=is_bgn, - is_end=is_end) - - if stream_reset_status: - frames.clear() - is_bgn = False - - # print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.") - # print("End recording.") - # recorder.hardware.set_led_off("blue") - + if self.listening_event.is_set(): + # 语音活动检测 + data = recorder.record_chunk_voice( + CHUNK=record_chunk_size, + return_type=None, + exception_on_overflow=False) + is_speech = recorder.is_speech(data) + + # 判断状态 + if is_speech: + print(f"{datetime.now()}: valid voice") + slience_bgn_t = time.time() + frames.append(data) + + slience_time = time.time() - slience_bgn_t + + # 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束 + if slience_time > recorder.max_slience_time: + recorder.hardware.long_power_status = False + break + + # 短时沉默结束单次对话:沉默时间超过一定时间段(0.5s左右),则发送数据 + if slience_time > recorder.min_act_time: + is_end = True + is_bgn = False + + print(f"{datetime.now()}: slience_time: {slience_time}") + + + # 流式发送数据 + stream_reset_status = self.stream_record_process( + bytes_frames=recorder.write_wave_bytes(frames), + frames_size=len(frames), + record_chunk_size=record_chunk_size, + sample_rate=recorder.RATE, + min_stream_record_time=recorder.min_stream_record_time, + is_bgn=is_bgn, + is_end=is_end) + + if stream_reset_status: + frames.clear() + is_bgn = False + + # print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.") + # print("End recording.") + if board == 'orangepi': + recorder.hardware.set_led_off("blue") + elif self.speaking_event.is_set(): + print(f"{datetime.now()}: wait for speaking.") + self.listening_event.wait() def stream_record_process(self, bytes_frames: bytes, @@ -225,13 +313,29 @@ class WebSocketClinet: while True: if self.client_queue.empty(): continue - - # print(f"init skt time: {datetime.now()}") + print(f"web socket: {self.wakeup_event.is_set()}, {self.interrupt_event.is_set()}, {self.listening_event.is_set()}") + print(f"{datetime.now()}: start setup web socket connection.") # 唤醒 - client.wakeup_client() + if self.wakeup_event.is_set() and not self.interrupt_event.is_set(): + client.wakeup_client() + clear_queue_flag = False + else: + print(f"not wake up, skip setup web socket connection.") + self.client_queue.get(block=False) + continue # 发送数据 for queue_data in QueueIterator(self.client_queue): + + # 当唤醒状态被关闭时,退出循环; 当被打断时,退出循环 + if not self.wakeup_event.is_set() or self.interrupt_event.is_set(): + clear_queue_flag = True + + if clear_queue_flag: + client.close_client() + break + + # 发送音频数据 if queue_data[0] == 'audio': audio_dict = queue_data[1] @@ -242,30 +346,31 @@ class WebSocketClinet: is_end=audio_dict['is_end'], encoding='base64', ) - # print(f"send skt time: {datetime.now()}") - # print(f"fnsh skt time: {datetime.now()}") + if clear_queue_flag: + continue + + # 等待播放 + self.listening_event.clear() + # 接收数据 while True: response, data_type = client.receive_per_data() if data_type == dict: - print(response) # 打印接收到的消息 - ''' - try: - response = json.loads(response['msg']) - if 'content' in response.keys(): - self.excute_queue.put((response['instruct'], response['content'])) - except json.JSONDecodeError as e: - print(f"json decode error: {e}") + print(f"{datetime.now()}: receive json data: {response}") # 打印接收到的消息 + # 误识别情况 + if response['code'] == 201: + print(f"false audio: wakeup: {self.wakeup_event.is_set()}, interrupt: {self.interrupt_event.is_set()}, listening: {self.listening_event.is_set()}") continue - # print(f"recv json time: {datetime.now()}") - ''' elif data_type == bytes: - # print(f"recv bytes time: {datetime.now()}") + # 开始播放 + self.speaking_event.set() + self.listening_event.clear() + print(f"{datetime.now()}: receive audio data") self.audio_play_queue.put(('audio_bytes', response)) elif data_type == None: + print(f"{datetime.now()}: receive None data, break loop.") break # 如果没有接收到消息,则退出循环 - # print("接收完毕:", datetime.now()) def audio_play_process(self): @@ -276,33 +381,61 @@ class WebSocketClinet: ''' audio_player = AudioPlayer(**self.player_args) print("Audio play process started.") + audio_list = [] while True: item = self.audio_play_queue.get() + + # 唤醒状态 + if not self.wakeup_event.is_set(): + continue + + # 打断状态 + if self.interrupt_event.is_set(): + self.listening_event.set() + self.speaking_event.clear() + + # 停止播放状态 + if not self.speaking_event.is_set(): + assert self.listening_event.is_set(), f"speaking event is not set, got wakeup_event [{self.wakeup_event.is_set()}], interrupt_event [{self.interrupt_event.is_set()}], listening_event [{self.listening_event.is_set()}]" + continue + # 播放音频 - print("Playing audio...") - tts_audio = item[1] - print(f"tts_audio len: {len(tts_audio)}") - print(f"play audio time: {datetime.now()}") + audio_list.append(item[1]) + print(f"{datetime.now()}: start playing audio.") try: - # 播放 - self.speaker_active_set.set() + + tts_audio = audio_list[0] # 取出第一个音频 tts_audio = audio_player.check_audio_type(tts_audio, return_type=None) for i in range(0, len(tts_audio), audio_player.CHUNK): audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK]) # print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio))) - if self.mircophone_active_set.is_set(): - print("mirophone is active.") - self.mircophone_active_set.wait() + + # 按键打断播放 + if self.interrupt_event.is_set(): + self.listening_event.set() + self.speaking_event.clear() break + # 关闭状态 + if not self.wakeup_event.is_set(): + self.listening_event.clear() + self.speaking_event.clear() + break + + if not self.speaking_event: + print(f"{datetime.now()}: speaker interrupt.") + audio_list = [] # 清空音频列表 + continue + # 播放最后一段音频 audio_player.stream.write(tts_audio[i+audio_player.CHUNK:]) - print(f"audio data played.") + audio_list.pop(0) # 弹出第一个音频 + print(f"{datetime.now()}: audio data played.") except TypeError as e: print(f"audio play error: {e}") continue + + # 启动监听状态 + self.listening_event.set() - # audio_player.stream.write(audio_data[i+audio_player.CHUNK:]) - # print(f"{item[0]} data played.") - diff --git a/tools/keyboard_io.py b/tools/keyboard_io.py new file mode 100644 index 0000000..457f2b4 --- /dev/null +++ b/tools/keyboard_io.py @@ -0,0 +1,6 @@ +from takway.board.keyboard import Keyboard + +if __name__ == '__main__': + kb = Keyboard() + while True: + pass \ No newline at end of file diff --git a/ws_client.py b/ws_client.py index adb93d6..b4189d7 100644 --- a/ws_client.py +++ b/ws_client.py @@ -15,7 +15,7 @@ if __name__ == '__main__': # session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed' # session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭 # session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2 - session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新 + session_id = '469f4a99-12a5-45a6-bc91-353df07423b6' # 新 excute_args = {} @@ -115,6 +115,7 @@ if __name__ == '__main__': parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer') parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec') parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec') + parser.add_argument('--min_act_time', type=float, default=0.5, help='Min inactive time, sec') # 等待多少秒沉默就发送音频 parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index') # player paramters @@ -155,6 +156,7 @@ if __name__ == '__main__': 'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32, 'CHANNELS': args.IN_CHANNELS, 'RATE': args.IN_RATE, + 'min_act_time': args.min_act_time, 'max_slience_time': args.max_slience_time, 'min_stream_record_time': args.min_stream_record_time, 'input_device_index': args.mircophone_device,