新交互逻辑,长按/短按

This commit is contained in:
IrvingGao 2024-05-23 16:07:23 +08:00
parent 8255cc8c68
commit 346559fb90
7 changed files with 281 additions and 118 deletions

View File

@ -396,7 +396,7 @@ except:
webrtcvad_available = False webrtcvad_available = False
class VADRecorder(HDRecorder): class VADRecorder(HDRecorder):
def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=1,**kwargs): def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=0.3,**kwargs):
super().__init__(**kwargs) super().__init__(**kwargs)
if webrtcvad_available: if webrtcvad_available:
self.vad = webrtcvad.Vad(vad_sensitivity) self.vad = webrtcvad.Vad(vad_sensitivity)

View File

@ -12,7 +12,7 @@ class Keyboard(BaseHardware):
self.keyboard_key = keyboard_key self.keyboard_key = keyboard_key
self.init_hd_thread() self.init_hd_thread()
self.power_status = False # 单次触发按键状态 self.long_power_status = False # 单次触发按键状态
def hd_detection_loop(self): def hd_detection_loop(self):
keyboard_status = False keyboard_status = False
@ -27,10 +27,12 @@ class Keyboard(BaseHardware):
time.sleep(0.001) time.sleep(0.001)
if not self.shared_hd_status and last_status: if not self.shared_hd_status and last_status:
self.power_status = ~self.power_status self.long_power_status = ~self.long_power_status
if self.power_status: if self.long_power_status:
print("Chating mode.") print("Chating mode.")
else: else:
print("Slience mode.") print("Slience mode.")
print(f"pres time: {datetime.datetime.now()}") print(f"pres time: {datetime.datetime.now()}")
last_status = self.shared_hd_status last_status = self.shared_hd_status

View File

@ -7,7 +7,6 @@ try:
except: except:
pass pass
''' '''
| GPIO | LED | | GPIO | LED |
| -- | - - | | -- | - - |
@ -17,7 +16,7 @@ except:
| 3 | 蓝色 | | 3 | 蓝色 |
| 4 | 白色 | | 4 | 白色 |
| GPIO | BUTTOM | | GPIO | BUTTON |
| -- | ---- | | -- | ---- |
| 6 | 按键1 | | 6 | 按键1 |
| 8 | 按键2 | | 8 | 按键2 |
@ -40,6 +39,8 @@ class OrangePi(BaseHardware):
self.led_set_status_2 = False self.led_set_status_2 = False
self.power_status = False # 单次触发按键状态 self.power_status = False # 单次触发按键状态
self.long_power_status = False
self.short_power_status = False
self.button_init() self.button_init()
self.init_hd_thread() self.init_hd_thread()
@ -47,40 +48,53 @@ class OrangePi(BaseHardware):
def button_init(self): def button_init(self):
wiringpi.wiringPiSetup() wiringpi.wiringPiSetup()
# GPIO 输出模式 # GPIO 输出模式
wiringpi.pinMode(self.LED_PIN_red,GPIO.OUTPUT) wiringpi.pinMode(self.LED_PIN_red, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_yellow,GPIO.OUTPUT) wiringpi.pinMode(self.LED_PIN_yellow, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_green,GPIO.OUTPUT) wiringpi.pinMode(self.LED_PIN_green, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_blue,GPIO.OUTPUT) wiringpi.pinMode(self.LED_PIN_blue, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_white,GPIO.OUTPUT) wiringpi.pinMode(self.LED_PIN_white, GPIO.OUTPUT)
# GPIO 输入模式 # GPIO 输入模式
wiringpi.pinMode(self.BUTTON_PIN_1,GPIO.INPUT) wiringpi.pinMode(self.BUTTON_PIN_1, GPIO.INPUT)
wiringpi.pinMode(self.BUTTON_PIN_2,GPIO.INPUT) wiringpi.pinMode(self.BUTTON_PIN_2, GPIO.INPUT)
def init_hd_thread(self): def init_hd_thread(self):
# hd_threads = [threading.Thread(target=self.hd_detection_loop),
# threading.Thread(target=self.hd_detection_loop_2)]
hd_threads = [threading.Thread(target=self.hd_detection_loop)] hd_threads = [threading.Thread(target=self.hd_detection_loop)]
for hd_thread in hd_threads: for hd_thread in hd_threads:
hd_thread.start() hd_thread.start()
def hd_detection_loop(self): def hd_detection_loop(self):
keyboard_status = False last_status_1 = False
last_status = False press_time_1 = None
while True:
self.button_status = wiringpi.digitalRead(self.BUTTON_PIN_1)
if self.button_status:
wiringpi.digitalWrite(self.LED_PIN_red, GPIO.LOW)
else:
wiringpi.digitalWrite(self.LED_PIN_red,GPIO.HIGH)
if not self.button_status and last_status: while True:
self.power_status = ~self.power_status button_status_1 = wiringpi.digitalRead(self.BUTTON_PIN_1)
if self.power_status: if not button_status_1 and last_status_1:
print("Chating mode.") press_time_1 = datetime.datetime.now()
else: elif button_status_1 and not last_status_1:
print("Slience mode.") if press_time_1:
print(f"pres time: {datetime.datetime.now()}") press_duration = (datetime.datetime.now() - press_time_1).total_seconds()
last_status = self.button_status print(f"press_duration: {press_duration}")
if press_duration > 1:
self.long_power_status = ~self.long_power_status
print(f"long_power_status: {self.long_power_status}")
else:
self.short_power_status = ~self.short_power_status
print(f"short_power_status: {self.short_power_status}")
press_time_1 = None
last_status_1 = button_status_1
# 更新LED状态
if self.long_power_status:
self.set_led_on('white')
else:
self.short_power_status = False
self.set_led_off('white')
if self.short_power_status:
self.set_led_on('yellow')
else:
self.set_led_off('yellow')
def set_led_on(self, color='red'): def set_led_on(self, color='red'):
wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH) wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH)

View File

@ -117,6 +117,12 @@ class BaseWebSocketClient:
''' '''
self.websocket = create_connection(self.server_url) self.websocket = create_connection(self.server_url)
def close_client(self):
'''
Close the client.
'''
self.websocket.close()
def send_per_data(self, def send_per_data(self,
text: str = '', text: str = '',
audio: bytes = b'', audio: bytes = b'',

View File

@ -39,7 +39,7 @@ class WebSocketClinet:
self.board = board self.board = board
# server_args # server_args
self.server_args = server_args self.server_args = server_args
# recorder_args # self.recorder_args
self.recorder_args = recorder_args self.recorder_args = recorder_args
# player_args # player_args
self.player_args = player_args self.player_args = player_args
@ -57,9 +57,15 @@ class WebSocketClinet:
self.audio_play_queue = manager.Queue() self.audio_play_queue = manager.Queue()
self.excute_queue = manager.Queue() self.excute_queue = manager.Queue()
# 多进程标志为 # 唤醒事件
self.mircophone_active_set = manager.Event() self.wakeup_event = manager.Event()
self.speaker_active_set = manager.Event() # 打断事件
self.interrupt_event = manager.Event()
# 监听事件
self.listening_event = manager.Event()
# 播放事件
self.speaking_event = manager.Event()
processes = [ processes = [
multiprocessing.Process(target=self.audio_process), multiprocessing.Process(target=self.audio_process),
@ -73,6 +79,7 @@ class WebSocketClinet:
for process in processes: for process in processes:
time.sleep(0.5) time.sleep(0.5)
# time.sleep(2)
process.start() process.start()
for process in processes: for process in processes:
process.join() process.join()
@ -89,6 +96,7 @@ class WebSocketClinet:
# TODO: # TODO:
press_type = self.recorder_args.pop('press_type') press_type = self.recorder_args.pop('press_type')
max_slience_time = self.recorder_args.pop('max_slience_time') max_slience_time = self.recorder_args.pop('max_slience_time')
board = self.recorder_args.pop('board', None)
if voice_trigger: if voice_trigger:
recorder = PicovoiceRecorder(**self.recorder_args) recorder = PicovoiceRecorder(**self.recorder_args)
else: else:
@ -100,14 +108,73 @@ class WebSocketClinet:
# TODO: # TODO:
recorder.press_type = press_type recorder.press_type = press_type
recorder.max_slience_time = max_slience_time recorder.max_slience_time = max_slience_time
recorder.board = board
# self.hardware = recorder.hardware
# self.long_power_status = recorder.hardware.long_power_status
print(f"recorder.min_act_time: {recorder.min_act_time}")
print(f"recorder.max_slience_time: {recorder.max_slience_time}")
print("Audio Process started.") print("Audio Process started.")
print("Waiting for wake up...") # create threads
# recorder.hardware.set_led_on("green") threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
if voice_trigger:
vioce_threads = [
threading.Thread(target=self.voice_trigger_thread, args=(recorder,)),
]
threads.extend(vioce_threads)
for thread in threads:
thread.start()
print("Audio Process started.")
while True:
for thread in threads:
thread.join()
print(f"audio process exit") ; exit()
def hardware_trigger_thread(self, recorder):
"""hardware_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
print("Hardware trigger thread started.")
last_short_power_status = False
while True:
# 开关按键被按下
if recorder.hardware.long_power_status:
if not self.wakeup_event.is_set():
self.wakeup_event.set()
# 短时按键被按下,打断
try:
if recorder.hardware.short_power_status and not last_short_power_status:
self.interrupt_event.set()
recorder.hardware.short_power_status = False
print("Interrupt conversation.")
last_short_power_status = ~recorder.hardware.short_power_status
else:
last_short_power_status = recorder.hardware.short_power_status
except:
pass
else:
self.wakeup_event.clear()
time.sleep(0.1)
def voice_trigger_thread(self, recorder):
"""voice_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
board = recorder.board
print("Waiting for wake up...")
if board == 'orangepi':
recorder.hardware.set_led_on("green")
while True: while True:
if self.shared_waiting:
continue
data = recorder.record_chunk_voice( data = recorder.record_chunk_voice(
CHUNK=recorder.porcupine.frame_length, CHUNK=recorder.porcupine.frame_length,
return_type=None, return_type=None,
@ -116,12 +183,25 @@ class WebSocketClinet:
record_chunk_size = recorder.vad_chunk_size record_chunk_size = recorder.vad_chunk_size
# 开关按键被按下或被关键词唤醒 # 开关按键被按下或被关键词唤醒
if recorder.hardware.power_status or recorder.is_wakeup(data): if self.wakeup_event.is_set():
# recorder.hardware.set_led_on("blue") print(f"{datetime.now()}: Wake up by button.")
pass
else: else:
continue if recorder.is_wakeup(data):
self.wakeup_event.set()
recorder.hardware.long_power_status = True
print(f"{datetime.now()}: wake up by voice.")
else:
continue
# 设置为倾听模式
self.listening_event.set()
self.speaking_event.clear()
if board == 'orangepi':
recorder.hardware.set_led_on("blue")
# wake up # wake up
is_bgn = True is_bgn = True
is_end = False is_end = False
@ -132,51 +212,59 @@ class WebSocketClinet:
print("Start recording...") print("Start recording...")
# 准备对话状态 # 准备对话状态
while True: while True:
# 语音活动检测 if not self.wakeup_event.is_set():
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
is_speech = recorder.is_speech(data)
# 判断状态
if is_speech:
print("valid voice")
slience_bgn_t = time.time()
frames.append(data)
else:
slience_time = time.time() - slience_bgn_t
# 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束
if slience_time > recorder.max_slience_time or not recorder.hardware.power_status:
break break
# 短时沉默结束单次对话沉默时间超过一定时间段0.5s左右),则发送数据 if self.listening_event.is_set():
if slience_time > recorder.min_act_time: # 语音活动检测
is_end = True data = recorder.record_chunk_voice(
is_bgn = False CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
is_speech = recorder.is_speech(data)
if not is_speech: # 判断状态
continue if is_speech:
print(f"{datetime.now()}: valid voice")
slience_bgn_t = time.time()
frames.append(data)
# 流式发送数据 slience_time = time.time() - slience_bgn_t
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=is_end)
if stream_reset_status: # 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束
frames.clear() if slience_time > recorder.max_slience_time:
is_bgn = False recorder.hardware.long_power_status = False
break
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.") # 短时沉默结束单次对话沉默时间超过一定时间段0.5s左右),则发送数据
# print("End recording.") if slience_time > recorder.min_act_time:
# recorder.hardware.set_led_off("blue") is_end = True
is_bgn = False
print(f"{datetime.now()}: slience_time: {slience_time}")
# 流式发送数据
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=is_end)
if stream_reset_status:
frames.clear()
is_bgn = False
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
# print("End recording.")
if board == 'orangepi':
recorder.hardware.set_led_off("blue")
elif self.speaking_event.is_set():
print(f"{datetime.now()}: wait for speaking.")
self.listening_event.wait()
def stream_record_process(self, def stream_record_process(self,
bytes_frames: bytes, bytes_frames: bytes,
@ -225,13 +313,29 @@ class WebSocketClinet:
while True: while True:
if self.client_queue.empty(): if self.client_queue.empty():
continue continue
print(f"web socket: {self.wakeup_event.is_set()}, {self.interrupt_event.is_set()}, {self.listening_event.is_set()}")
# print(f"init skt time: {datetime.now()}") print(f"{datetime.now()}: start setup web socket connection.")
# 唤醒 # 唤醒
client.wakeup_client() if self.wakeup_event.is_set() and not self.interrupt_event.is_set():
client.wakeup_client()
clear_queue_flag = False
else:
print(f"not wake up, skip setup web socket connection.")
self.client_queue.get(block=False)
continue
# 发送数据 # 发送数据
for queue_data in QueueIterator(self.client_queue): for queue_data in QueueIterator(self.client_queue):
# 当唤醒状态被关闭时,退出循环; 当被打断时,退出循环
if not self.wakeup_event.is_set() or self.interrupt_event.is_set():
clear_queue_flag = True
if clear_queue_flag:
client.close_client()
break
# 发送音频数据
if queue_data[0] == 'audio': if queue_data[0] == 'audio':
audio_dict = queue_data[1] audio_dict = queue_data[1]
@ -242,30 +346,31 @@ class WebSocketClinet:
is_end=audio_dict['is_end'], is_end=audio_dict['is_end'],
encoding='base64', encoding='base64',
) )
# print(f"send skt time: {datetime.now()}")
# print(f"fnsh skt time: {datetime.now()}") if clear_queue_flag:
continue
# 等待播放
self.listening_event.clear()
# 接收数据 # 接收数据
while True: while True:
response, data_type = client.receive_per_data() response, data_type = client.receive_per_data()
if data_type == dict: if data_type == dict:
print(response) # 打印接收到的消息 print(f"{datetime.now()}: receive json data: {response}") # 打印接收到的消息
''' # 误识别情况
try: if response['code'] == 201:
response = json.loads(response['msg']) print(f"false audio: wakeup: {self.wakeup_event.is_set()}, interrupt: {self.interrupt_event.is_set()}, listening: {self.listening_event.is_set()}")
if 'content' in response.keys():
self.excute_queue.put((response['instruct'], response['content']))
except json.JSONDecodeError as e:
print(f"json decode error: {e}")
continue continue
# print(f"recv json time: {datetime.now()}")
'''
elif data_type == bytes: elif data_type == bytes:
# print(f"recv bytes time: {datetime.now()}") # 开始播放
self.speaking_event.set()
self.listening_event.clear()
print(f"{datetime.now()}: receive audio data")
self.audio_play_queue.put(('audio_bytes', response)) self.audio_play_queue.put(('audio_bytes', response))
elif data_type == None: elif data_type == None:
print(f"{datetime.now()}: receive None data, break loop.")
break # 如果没有接收到消息,则退出循环 break # 如果没有接收到消息,则退出循环
# print("接收完毕:", datetime.now())
def audio_play_process(self): def audio_play_process(self):
@ -276,32 +381,60 @@ class WebSocketClinet:
''' '''
audio_player = AudioPlayer(**self.player_args) audio_player = AudioPlayer(**self.player_args)
print("Audio play process started.") print("Audio play process started.")
audio_list = []
while True: while True:
item = self.audio_play_queue.get() item = self.audio_play_queue.get()
# 唤醒状态
if not self.wakeup_event.is_set():
continue
# 打断状态
if self.interrupt_event.is_set():
self.listening_event.set()
self.speaking_event.clear()
# 停止播放状态
if not self.speaking_event.is_set():
assert self.listening_event.is_set(), f"speaking event is not set, got wakeup_event [{self.wakeup_event.is_set()}], interrupt_event [{self.interrupt_event.is_set()}], listening_event [{self.listening_event.is_set()}]"
continue
# 播放音频 # 播放音频
print("Playing audio...") audio_list.append(item[1])
tts_audio = item[1] print(f"{datetime.now()}: start playing audio.")
print(f"tts_audio len: {len(tts_audio)}")
print(f"play audio time: {datetime.now()}")
try: try:
# 播放
self.speaker_active_set.set() tts_audio = audio_list[0] # 取出第一个音频
tts_audio = audio_player.check_audio_type(tts_audio, return_type=None) tts_audio = audio_player.check_audio_type(tts_audio, return_type=None)
for i in range(0, len(tts_audio), audio_player.CHUNK): for i in range(0, len(tts_audio), audio_player.CHUNK):
audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK]) audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK])
# print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio))) # print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio)))
if self.mircophone_active_set.is_set():
print("mirophone is active.") # 按键打断播放
self.mircophone_active_set.wait() if self.interrupt_event.is_set():
self.listening_event.set()
self.speaking_event.clear()
break break
# 关闭状态
if not self.wakeup_event.is_set():
self.listening_event.clear()
self.speaking_event.clear()
break
if not self.speaking_event:
print(f"{datetime.now()}: speaker interrupt.")
audio_list = [] # 清空音频列表
continue
# 播放最后一段音频
audio_player.stream.write(tts_audio[i+audio_player.CHUNK:]) audio_player.stream.write(tts_audio[i+audio_player.CHUNK:])
print(f"audio data played.") audio_list.pop(0) # 弹出第一个音频
print(f"{datetime.now()}: audio data played.")
except TypeError as e: except TypeError as e:
print(f"audio play error: {e}") print(f"audio play error: {e}")
continue continue
# audio_player.stream.write(audio_data[i+audio_player.CHUNK:]) # 启动监听状态
# print(f"{item[0]} data played.") self.listening_event.set()

6
tools/keyboard_io.py Normal file
View File

@ -0,0 +1,6 @@
from takway.board.keyboard import Keyboard
if __name__ == '__main__':
kb = Keyboard()
while True:
pass

View File

@ -15,7 +15,7 @@ if __name__ == '__main__':
# session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed' # session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed'
# session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭 # session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭
# session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2 # session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2
session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新 session_id = '469f4a99-12a5-45a6-bc91-353df07423b6' # 新
excute_args = {} excute_args = {}
@ -115,6 +115,7 @@ if __name__ == '__main__':
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer') parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec') parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec')
parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec') parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec')
parser.add_argument('--min_act_time', type=float, default=0.5, help='Min inactive time, sec') # 等待多少秒沉默就发送音频
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index') parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
# player paramters # player paramters
@ -155,6 +156,7 @@ if __name__ == '__main__':
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32, 'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.IN_CHANNELS, 'CHANNELS': args.IN_CHANNELS,
'RATE': args.IN_RATE, 'RATE': args.IN_RATE,
'min_act_time': args.min_act_time,
'max_slience_time': args.max_slience_time, 'max_slience_time': args.max_slience_time,
'min_stream_record_time': args.min_stream_record_time, 'min_stream_record_time': args.min_stream_record_time,
'input_device_index': args.mircophone_device, 'input_device_index': args.mircophone_device,