From 2a298fb5e2e1bffac5b60bead354ebbb9a06d5c3 Mon Sep 17 00:00:00 2001 From: IrvingGao <1729854488@qq.com> Date: Thu, 23 May 2024 20:00:40 +0800 Subject: [PATCH] [bug] board --- takway/clients/client_utils.py | 30 ++++++- takway/clients/web_socket_client_utils.py | 99 +++++++++++++---------- tools/easy_client.py | 30 +++++++ 3 files changed, 113 insertions(+), 46 deletions(-) create mode 100644 tools/easy_client.py diff --git a/takway/clients/client_utils.py b/takway/clients/client_utils.py index 4ba1893..61c6a0a 100644 --- a/takway/clients/client_utils.py +++ b/takway/clients/client_utils.py @@ -3,7 +3,7 @@ import json import time import datetime import requests - +import struct from takway.common_utils import encode_bytes2str, decode_str2bytes ''' @@ -154,8 +154,10 @@ class BaseWebSocketClient: recv_data = self.websocket.recv() except websocket._exceptions.WebSocketConnectionClosedException: return None, None + ''' try: recv_data = json.loads(recv_data) + #解析头信息,假设头信息前 8 个字节包含两个长度字段 except json.JSONDecodeError as e: # print(f"JSONDecodeError: {e}") # is_end = True @@ -163,4 +165,28 @@ class BaseWebSocketClient: except Exception as e: # print(f"receive_per_data error: {e}") assert isinstance(recv_data, bytes), ValueError(f"Received data is not bytes, got {type(recv_data)}.") - return recv_data, type(recv_data) \ No newline at end of file + return recv_data, type(recv_data) + ''' + + try: + #解析头信息,假设头信息前 8 个字节包含两个长度字段 + header = recv_data[:8] + text_length, audio_length = struct.unpack('!II', header) + + #提取文本和二进制音频数据 + text_bytes = recv_data[8:8 + text_length] + audio = recv_data[8 + text_length:8 + text_length + audio_length] + text = text_bytes.decode('utf-8') + + print("Received text:",text) + #处理音频数据,例如播放音频 + print("Received audio(length):",len(audio)) + return [text, audio], list + except TypeError as e: + try: + recv_data = json.loads(recv_data) + print(f"json: {recv_data}") + return recv_data, type(recv_data) + except json.JSONDecodeError as e: + pass + \ No newline at end of file diff --git a/takway/clients/web_socket_client_utils.py b/takway/clients/web_socket_client_utils.py index c02dcd4..0cf0a6d 100644 --- a/takway/clients/web_socket_client_utils.py +++ b/takway/clients/web_socket_client_utils.py @@ -398,19 +398,24 @@ class WebSocketClinet: self.speaking_event.clear() break elif response['code'] == 200: + self.audio_play_queue.put(('audio_json', response)) # 切换播放模式 self.listening_event.clear() self.speaking_event.set() if response['type'] == 'close': break + elif data_type == list: + # ####################### 更新后的代码 # + print(f"{datetime.now()}: receive audio_list") + self.audio_play_queue.put(('audio_list', response)) + elif data_type == bytes: # 开始播放 - print(f"{datetime.now()}: receive audio data") - print(f"listening_event: {self.listening_event.is_set()}, speaking_event: {self.speaking_event.is_set()}") + # print(f"{datetime.now()}: receive audio data") self.audio_play_queue.put(('audio_bytes', response)) elif data_type == None: print(f"{datetime.now()}: receive None data, break loop.") - print(f"listening_event: {self.listening_event.is_set()}, speaking_event: {self.speaking_event.is_set()}") + # print(f"listening_event: {self.listening_event.is_set()}, speaking_event: {self.speaking_event.is_set()}") break # 如果没有接收到消息,则退出循环 @@ -430,51 +435,57 @@ class WebSocketClinet: if not self.wakeup_event.is_set(): continue - audio_list.append(item[1]) - print(f"{datetime.now()}: start playing audio.") - - - if self.listening_event.is_set(): - continue - - if not self.speaking_event.is_set(): - continue - - # 播放音频 - try: + if item[0] == 'audio_list': + # TODO: 判断bytes是否是最后一个,如果是最后一个,则播放完毕,切换监听模式 + audio_info, audio_data = item[1] + data_type = audio_info['type'] - tts_audio = audio_list[0] # 取出第一个音频 - tts_audio = audio_player.check_audio_type(tts_audio, return_type=None) - for i in range(0, len(tts_audio), audio_player.CHUNK): - audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK]) - print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio))) - - # 按键打断播放 - if self.listening_event.is_set() and not self.speaking_event.is_set(): - print(f"{datetime.now()}: speaker interrupt.") - break - # 关闭状态 - if not self.wakeup_event.is_set(): - print(f"{datetime.now()}: speaker close.") - self.listening_event.clear() - self.speaking_event.clear() - break + print(f"{datetime.now()}: start playing audio.") - if not self.speaking_event: - audio_list = [] # 清空音频列表 + if self.listening_event.is_set(): continue - # 播放最后一段音频 - audio_player.stream.write(tts_audio[i+audio_player.CHUNK:]) - audio_list.pop(0) # 弹出第一个音频 - print(f"{datetime.now()}: audio data played.") - except TypeError as e: - print(f"audio play error: {e}") - continue + + if not self.speaking_event.is_set(): + continue + + audio_list.append(audio_data) + + # 播放音频 + try: + tts_audio = audio_list[0] # 取出第一个音频 + tts_audio = audio_player.check_audio_type(tts_audio, return_type=None) + for i in range(0, len(tts_audio), audio_player.CHUNK): + audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK]) + print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio))) + + # 按键打断播放 + if self.listening_event.is_set() and not self.speaking_event.is_set(): + print(f"{datetime.now()}: speaker interrupt.") + break + # 关闭状态 + if not self.wakeup_event.is_set(): + print(f"{datetime.now()}: speaker close.") + self.listening_event.clear() + self.speaking_event.clear() + break + + if not self.speaking_event: + audio_list = [] # 清空音频列表 + continue + # 播放最后一段音频 + audio_player.stream.write(tts_audio[i+audio_player.CHUNK:]) + audio_list.pop(0) # 弹出第一个音频 + print(f"{datetime.now()}: audio data played.") + except TypeError as e: + print(f"audio play error: {e}") + continue + + if self.wakeup_event.is_set(): + if data_type == 'end': + # 启动监听状态 + self.speaking_event.clear() + self.listening_event.set() - if self.wakeup_event.is_set(): - # 启动监听状态 - self.speaking_event.clear() - self.listening_event.set() diff --git a/tools/easy_client.py b/tools/easy_client.py new file mode 100644 index 0000000..17c9a77 --- /dev/null +++ b/tools/easy_client.py @@ -0,0 +1,30 @@ +from takway.clients.client_utils import BaseWebSocketClient + +from takway.audio_utils import AudioPlayer + + + +if __name__ == '__main__': + server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary' + + session_id = '469f4a99-12a5-45a6-bc91-353df07423b6' # 新 + client = BaseWebSocketClient(server_url, session_id) + + client.wakeup_client() + + wave_file = 'example_recording.wav' + + player = AudioPlayer() + + frames = player.load_audio_file(wave_file) + + client.send_per_data(audio=frames, voice_synthesize=True, is_end=True, encoding='base64') + + + while True: + try: + response, data_type = client.receive_per_data() + print(response[0], len(response[1])) + except: + pass + \ No newline at end of file