新交互逻辑,长按/短按

This commit is contained in:
IrvingGao 2024-05-23 16:07:23 +08:00
parent 8255cc8c68
commit 346559fb90
7 changed files with 281 additions and 118 deletions

View File

@ -396,7 +396,7 @@ except:
webrtcvad_available = False
class VADRecorder(HDRecorder):
def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=1,**kwargs):
def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=0.3,**kwargs):
super().__init__(**kwargs)
if webrtcvad_available:
self.vad = webrtcvad.Vad(vad_sensitivity)

View File

@ -12,7 +12,7 @@ class Keyboard(BaseHardware):
self.keyboard_key = keyboard_key
self.init_hd_thread()
self.power_status = False # 单次触发按键状态
self.long_power_status = False # 单次触发按键状态
def hd_detection_loop(self):
keyboard_status = False
@ -27,10 +27,12 @@ class Keyboard(BaseHardware):
time.sleep(0.001)
if not self.shared_hd_status and last_status:
self.power_status = ~self.power_status
if self.power_status:
self.long_power_status = ~self.long_power_status
if self.long_power_status:
print("Chating mode.")
else:
print("Slience mode.")
print(f"pres time: {datetime.datetime.now()}")
last_status = self.shared_hd_status

View File

@ -7,7 +7,6 @@ try:
except:
pass
'''
| GPIO | LED |
| -- | - - |
@ -17,7 +16,7 @@ except:
| 3 | 蓝色 |
| 4 | 白色 |
| GPIO | BUTTOM |
| GPIO | BUTTON |
| -- | ---- |
| 6 | 按键1 |
| 8 | 按键2 |
@ -40,6 +39,8 @@ class OrangePi(BaseHardware):
self.led_set_status_2 = False
self.power_status = False # 单次触发按键状态
self.long_power_status = False
self.short_power_status = False
self.button_init()
self.init_hd_thread()
@ -47,40 +48,53 @@ class OrangePi(BaseHardware):
def button_init(self):
wiringpi.wiringPiSetup()
# GPIO 输出模式
wiringpi.pinMode(self.LED_PIN_red,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_yellow,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_green,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_blue,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_white,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_red, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_yellow, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_green, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_blue, GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_white, GPIO.OUTPUT)
# GPIO 输入模式
wiringpi.pinMode(self.BUTTON_PIN_1,GPIO.INPUT)
wiringpi.pinMode(self.BUTTON_PIN_2,GPIO.INPUT)
wiringpi.pinMode(self.BUTTON_PIN_1, GPIO.INPUT)
wiringpi.pinMode(self.BUTTON_PIN_2, GPIO.INPUT)
def init_hd_thread(self):
# hd_threads = [threading.Thread(target=self.hd_detection_loop),
# threading.Thread(target=self.hd_detection_loop_2)]
hd_threads = [threading.Thread(target=self.hd_detection_loop)]
for hd_thread in hd_threads:
hd_thread.start()
def hd_detection_loop(self):
keyboard_status = False
last_status = False
while True:
self.button_status = wiringpi.digitalRead(self.BUTTON_PIN_1)
if self.button_status:
wiringpi.digitalWrite(self.LED_PIN_red, GPIO.LOW)
else:
wiringpi.digitalWrite(self.LED_PIN_red,GPIO.HIGH)
last_status_1 = False
press_time_1 = None
if not self.button_status and last_status:
self.power_status = ~self.power_status
if self.power_status:
print("Chating mode.")
else:
print("Slience mode.")
print(f"pres time: {datetime.datetime.now()}")
last_status = self.button_status
while True:
button_status_1 = wiringpi.digitalRead(self.BUTTON_PIN_1)
if not button_status_1 and last_status_1:
press_time_1 = datetime.datetime.now()
elif button_status_1 and not last_status_1:
if press_time_1:
press_duration = (datetime.datetime.now() - press_time_1).total_seconds()
print(f"press_duration: {press_duration}")
if press_duration > 1:
self.long_power_status = ~self.long_power_status
print(f"long_power_status: {self.long_power_status}")
else:
self.short_power_status = ~self.short_power_status
print(f"short_power_status: {self.short_power_status}")
press_time_1 = None
last_status_1 = button_status_1
# 更新LED状态
if self.long_power_status:
self.set_led_on('white')
else:
self.short_power_status = False
self.set_led_off('white')
if self.short_power_status:
self.set_led_on('yellow')
else:
self.set_led_off('yellow')
def set_led_on(self, color='red'):
wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH)

View File

@ -117,6 +117,12 @@ class BaseWebSocketClient:
'''
self.websocket = create_connection(self.server_url)
def close_client(self):
'''
Close the client.
'''
self.websocket.close()
def send_per_data(self,
text: str = '',
audio: bytes = b'',

View File

@ -39,7 +39,7 @@ class WebSocketClinet:
self.board = board
# server_args
self.server_args = server_args
# recorder_args
# self.recorder_args
self.recorder_args = recorder_args
# player_args
self.player_args = player_args
@ -57,9 +57,15 @@ class WebSocketClinet:
self.audio_play_queue = manager.Queue()
self.excute_queue = manager.Queue()
# 多进程标志为
self.mircophone_active_set = manager.Event()
self.speaker_active_set = manager.Event()
# 唤醒事件
self.wakeup_event = manager.Event()
# 打断事件
self.interrupt_event = manager.Event()
# 监听事件
self.listening_event = manager.Event()
# 播放事件
self.speaking_event = manager.Event()
processes = [
multiprocessing.Process(target=self.audio_process),
@ -73,6 +79,7 @@ class WebSocketClinet:
for process in processes:
time.sleep(0.5)
# time.sleep(2)
process.start()
for process in processes:
process.join()
@ -89,6 +96,7 @@ class WebSocketClinet:
# TODO:
press_type = self.recorder_args.pop('press_type')
max_slience_time = self.recorder_args.pop('max_slience_time')
board = self.recorder_args.pop('board', None)
if voice_trigger:
recorder = PicovoiceRecorder(**self.recorder_args)
else:
@ -100,14 +108,73 @@ class WebSocketClinet:
# TODO:
recorder.press_type = press_type
recorder.max_slience_time = max_slience_time
recorder.board = board
# self.hardware = recorder.hardware
# self.long_power_status = recorder.hardware.long_power_status
print(f"recorder.min_act_time: {recorder.min_act_time}")
print(f"recorder.max_slience_time: {recorder.max_slience_time}")
print("Audio Process started.")
print("Waiting for wake up...")
# recorder.hardware.set_led_on("green")
# create threads
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
if voice_trigger:
vioce_threads = [
threading.Thread(target=self.voice_trigger_thread, args=(recorder,)),
]
threads.extend(vioce_threads)
for thread in threads:
thread.start()
print("Audio Process started.")
while True:
for thread in threads:
thread.join()
print(f"audio process exit") ; exit()
def hardware_trigger_thread(self, recorder):
"""hardware_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
print("Hardware trigger thread started.")
last_short_power_status = False
while True:
# 开关按键被按下
if recorder.hardware.long_power_status:
if not self.wakeup_event.is_set():
self.wakeup_event.set()
# 短时按键被按下,打断
try:
if recorder.hardware.short_power_status and not last_short_power_status:
self.interrupt_event.set()
recorder.hardware.short_power_status = False
print("Interrupt conversation.")
last_short_power_status = ~recorder.hardware.short_power_status
else:
last_short_power_status = recorder.hardware.short_power_status
except:
pass
else:
self.wakeup_event.clear()
time.sleep(0.1)
def voice_trigger_thread(self, recorder):
"""voice_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
board = recorder.board
print("Waiting for wake up...")
if board == 'orangepi':
recorder.hardware.set_led_on("green")
while True:
if self.shared_waiting:
continue
data = recorder.record_chunk_voice(
CHUNK=recorder.porcupine.frame_length,
return_type=None,
@ -116,12 +183,25 @@ class WebSocketClinet:
record_chunk_size = recorder.vad_chunk_size
# 开关按键被按下或被关键词唤醒
if recorder.hardware.power_status or recorder.is_wakeup(data):
# recorder.hardware.set_led_on("blue")
pass
if self.wakeup_event.is_set():
print(f"{datetime.now()}: Wake up by button.")
else:
continue
if recorder.is_wakeup(data):
self.wakeup_event.set()
recorder.hardware.long_power_status = True
print(f"{datetime.now()}: wake up by voice.")
else:
continue
# 设置为倾听模式
self.listening_event.set()
self.speaking_event.clear()
if board == 'orangepi':
recorder.hardware.set_led_on("blue")
# wake up
is_bgn = True
is_end = False
@ -132,51 +212,59 @@ class WebSocketClinet:
print("Start recording...")
# 准备对话状态
while True:
# 语音活动检测
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
is_speech = recorder.is_speech(data)
# 判断状态
if is_speech:
print("valid voice")
slience_bgn_t = time.time()
frames.append(data)
else:
slience_time = time.time() - slience_bgn_t
# 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束
if slience_time > recorder.max_slience_time or not recorder.hardware.power_status:
if not self.wakeup_event.is_set():
break
# 短时沉默结束单次对话沉默时间超过一定时间段0.5s左右),则发送数据
if slience_time > recorder.min_act_time:
is_end = True
is_bgn = False
if self.listening_event.is_set():
# 语音活动检测
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
is_speech = recorder.is_speech(data)
if not is_speech:
continue
# 判断状态
if is_speech:
print(f"{datetime.now()}: valid voice")
slience_bgn_t = time.time()
frames.append(data)
# 流式发送数据
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=is_end)
slience_time = time.time() - slience_bgn_t
if stream_reset_status:
frames.clear()
is_bgn = False
# 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束
if slience_time > recorder.max_slience_time:
recorder.hardware.long_power_status = False
break
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
# print("End recording.")
# recorder.hardware.set_led_off("blue")
# 短时沉默结束单次对话沉默时间超过一定时间段0.5s左右),则发送数据
if slience_time > recorder.min_act_time:
is_end = True
is_bgn = False
print(f"{datetime.now()}: slience_time: {slience_time}")
# 流式发送数据
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=is_end)
if stream_reset_status:
frames.clear()
is_bgn = False
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
# print("End recording.")
if board == 'orangepi':
recorder.hardware.set_led_off("blue")
elif self.speaking_event.is_set():
print(f"{datetime.now()}: wait for speaking.")
self.listening_event.wait()
def stream_record_process(self,
bytes_frames: bytes,
@ -225,13 +313,29 @@ class WebSocketClinet:
while True:
if self.client_queue.empty():
continue
# print(f"init skt time: {datetime.now()}")
print(f"web socket: {self.wakeup_event.is_set()}, {self.interrupt_event.is_set()}, {self.listening_event.is_set()}")
print(f"{datetime.now()}: start setup web socket connection.")
# 唤醒
client.wakeup_client()
if self.wakeup_event.is_set() and not self.interrupt_event.is_set():
client.wakeup_client()
clear_queue_flag = False
else:
print(f"not wake up, skip setup web socket connection.")
self.client_queue.get(block=False)
continue
# 发送数据
for queue_data in QueueIterator(self.client_queue):
# 当唤醒状态被关闭时,退出循环; 当被打断时,退出循环
if not self.wakeup_event.is_set() or self.interrupt_event.is_set():
clear_queue_flag = True
if clear_queue_flag:
client.close_client()
break
# 发送音频数据
if queue_data[0] == 'audio':
audio_dict = queue_data[1]
@ -242,30 +346,31 @@ class WebSocketClinet:
is_end=audio_dict['is_end'],
encoding='base64',
)
# print(f"send skt time: {datetime.now()}")
# print(f"fnsh skt time: {datetime.now()}")
if clear_queue_flag:
continue
# 等待播放
self.listening_event.clear()
# 接收数据
while True:
response, data_type = client.receive_per_data()
if data_type == dict:
print(response) # 打印接收到的消息
'''
try:
response = json.loads(response['msg'])
if 'content' in response.keys():
self.excute_queue.put((response['instruct'], response['content']))
except json.JSONDecodeError as e:
print(f"json decode error: {e}")
print(f"{datetime.now()}: receive json data: {response}") # 打印接收到的消息
# 误识别情况
if response['code'] == 201:
print(f"false audio: wakeup: {self.wakeup_event.is_set()}, interrupt: {self.interrupt_event.is_set()}, listening: {self.listening_event.is_set()}")
continue
# print(f"recv json time: {datetime.now()}")
'''
elif data_type == bytes:
# print(f"recv bytes time: {datetime.now()}")
# 开始播放
self.speaking_event.set()
self.listening_event.clear()
print(f"{datetime.now()}: receive audio data")
self.audio_play_queue.put(('audio_bytes', response))
elif data_type == None:
print(f"{datetime.now()}: receive None data, break loop.")
break # 如果没有接收到消息,则退出循环
# print("接收完毕:", datetime.now())
def audio_play_process(self):
@ -276,32 +381,60 @@ class WebSocketClinet:
'''
audio_player = AudioPlayer(**self.player_args)
print("Audio play process started.")
audio_list = []
while True:
item = self.audio_play_queue.get()
# 唤醒状态
if not self.wakeup_event.is_set():
continue
# 打断状态
if self.interrupt_event.is_set():
self.listening_event.set()
self.speaking_event.clear()
# 停止播放状态
if not self.speaking_event.is_set():
assert self.listening_event.is_set(), f"speaking event is not set, got wakeup_event [{self.wakeup_event.is_set()}], interrupt_event [{self.interrupt_event.is_set()}], listening_event [{self.listening_event.is_set()}]"
continue
# 播放音频
print("Playing audio...")
tts_audio = item[1]
print(f"tts_audio len: {len(tts_audio)}")
print(f"play audio time: {datetime.now()}")
audio_list.append(item[1])
print(f"{datetime.now()}: start playing audio.")
try:
# 播放
self.speaker_active_set.set()
tts_audio = audio_list[0] # 取出第一个音频
tts_audio = audio_player.check_audio_type(tts_audio, return_type=None)
for i in range(0, len(tts_audio), audio_player.CHUNK):
audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK])
# print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio)))
if self.mircophone_active_set.is_set():
print("mirophone is active.")
self.mircophone_active_set.wait()
# 按键打断播放
if self.interrupt_event.is_set():
self.listening_event.set()
self.speaking_event.clear()
break
# 关闭状态
if not self.wakeup_event.is_set():
self.listening_event.clear()
self.speaking_event.clear()
break
if not self.speaking_event:
print(f"{datetime.now()}: speaker interrupt.")
audio_list = [] # 清空音频列表
continue
# 播放最后一段音频
audio_player.stream.write(tts_audio[i+audio_player.CHUNK:])
print(f"audio data played.")
audio_list.pop(0) # 弹出第一个音频
print(f"{datetime.now()}: audio data played.")
except TypeError as e:
print(f"audio play error: {e}")
continue
# audio_player.stream.write(audio_data[i+audio_player.CHUNK:])
# print(f"{item[0]} data played.")
# 启动监听状态
self.listening_event.set()

6
tools/keyboard_io.py Normal file
View File

@ -0,0 +1,6 @@
from takway.board.keyboard import Keyboard
if __name__ == '__main__':
kb = Keyboard()
while True:
pass

View File

@ -15,7 +15,7 @@ if __name__ == '__main__':
# session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed'
# session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭
# session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2
session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新
session_id = '469f4a99-12a5-45a6-bc91-353df07423b6' # 新
excute_args = {}
@ -115,6 +115,7 @@ if __name__ == '__main__':
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec')
parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec')
parser.add_argument('--min_act_time', type=float, default=0.5, help='Min inactive time, sec') # 等待多少秒沉默就发送音频
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
# player paramters
@ -155,6 +156,7 @@ if __name__ == '__main__':
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.IN_CHANNELS,
'RATE': args.IN_RATE,
'min_act_time': args.min_act_time,
'max_slience_time': args.max_slience_time,
'min_stream_record_time': args.min_stream_record_time,
'input_device_index': args.mircophone_device,