This commit is contained in:
IrvingGao 2024-05-23 01:27:51 +08:00
commit 8255cc8c68
36 changed files with 4800 additions and 0 deletions

401
.gitignore vendored Normal file
View File

@ -0,0 +1,401 @@
## Ignore Visual Studio temporary files, build results, and
## files generated by popular Visual Studio add-ons.
##
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
# User-specific files
*.rsuser
*.suo
*.user
*.userosscache
*.sln.docstates
# User-specific files (MonoDevelop/Xamarin Studio)
*.userprefs
# Mono auto generated files
mono_crash.*
# Build results
[Dd]ebug/
[Dd]ebugPublic/
[Rr]elease/
[Rr]eleases/
x64/
x86/
[Ww][Ii][Nn]32/
[Aa][Rr][Mm]/
[Aa][Rr][Mm]64/
bld/
[Bb]in/
[Oo]bj/
[Oo]ut/
[Ll]og/
[Ll]ogs/
# Visual Studio 2015/2017 cache/options directory
.vs/
# Uncomment if you have tasks that create the project's static files in wwwroot
#wwwroot/
# Visual Studio 2017 auto generated files
Generated\ Files/
# MSTest test Results
[Tt]est[Rr]esult*/
[Bb]uild[Ll]og.*
# NUnit
*.VisualState.xml
TestResult.xml
nunit-*.xml
# Build Results of an ATL Project
[Dd]ebugPS/
[Rr]eleasePS/
dlldata.c
# Benchmark Results
BenchmarkDotNet.Artifacts/
# .NET Core
project.lock.json
project.fragment.lock.json
artifacts/
# ASP.NET Scaffolding
ScaffoldingReadMe.txt
# StyleCop
StyleCopReport.xml
# Files built by Visual Studio
*_i.c
*_p.c
*_h.h
*.ilk
*.meta
*.obj
*.iobj
*.pch
*.pdb
*.ipdb
*.pgc
*.pgd
*.rsp
*.sbr
*.tlb
*.tli
*.tlh
*.tmp
*.tmp_proj
*_wpftmp.csproj
*.log
*.vspscc
*.vssscc
.builds
*.pidb
*.svclog
*.scc
# Chutzpah Test files
_Chutzpah*
# Visual C++ cache files
ipch/
*.aps
*.ncb
*.opendb
*.opensdf
*.sdf
*.cachefile
*.VC.db
*.VC.VC.opendb
# Visual Studio profiler
*.psess
*.vsp
*.vspx
*.sap
# Visual Studio Trace Files
*.e2e
# TFS 2012 Local Workspace
$tf/
# Guidance Automation Toolkit
*.gpState
# ReSharper is a .NET coding add-in
_ReSharper*/
*.[Rr]e[Ss]harper
*.DotSettings.user
# TeamCity is a build add-in
_TeamCity*
# DotCover is a Code Coverage Tool
*.dotCover
# AxoCover is a Code Coverage Tool
.axoCover/*
!.axoCover/settings.json
# Coverlet is a free, cross platform Code Coverage Tool
coverage*.json
coverage*.xml
coverage*.info
# Visual Studio code coverage results
*.coverage
*.coveragexml
# NCrunch
_NCrunch_*
.*crunch*.local.xml
nCrunchTemp_*
# MightyMoose
*.mm.*
AutoTest.Net/
# Web workbench (sass)
.sass-cache/
# Installshield output folder
[Ee]xpress/
# DocProject is a documentation generator add-in
DocProject/buildhelp/
DocProject/Help/*.HxT
DocProject/Help/*.HxC
DocProject/Help/*.hhc
DocProject/Help/*.hhk
DocProject/Help/*.hhp
DocProject/Help/Html2
DocProject/Help/html
# Click-Once directory
publish/
# Publish Web Output
*.[Pp]ublish.xml
*.azurePubxml
# Note: Comment the next line if you want to checkin your web deploy settings,
# but database connection strings (with potential passwords) will be unencrypted
*.pubxml
*.publishproj
# Microsoft Azure Web App publish settings. Comment the next line if you want to
# checkin your Azure Web App publish settings, but sensitive information contained
# in these scripts will be unencrypted
PublishScripts/
# NuGet Packages
*.nupkg
# NuGet Symbol Packages
*.snupkg
# The packages folder can be ignored because of Package Restore
**/[Pp]ackages/*
# except build/, which is used as an MSBuild target.
!**/[Pp]ackages/build/
# Uncomment if necessary however generally it will be regenerated when needed
#!**/[Pp]ackages/repositories.config
# NuGet v3's project.json files produces more ignorable files
*.nuget.props
*.nuget.targets
# Microsoft Azure Build Output
csx/
*.build.csdef
# Microsoft Azure Emulator
ecf/
rcf/
# Windows Store app package directories and files
AppPackages/
BundleArtifacts/
Package.StoreAssociation.xml
_pkginfo.txt
*.appx
*.appxbundle
*.appxupload
# Visual Studio cache files
# files ending in .cache can be ignored
*.[Cc]ache
# but keep track of directories ending in .cache
!?*.[Cc]ache/
# Others
ClientBin/
~$*
*~
*.dbmdl
*.dbproj.schemaview
*.jfm
*.pfx
*.publishsettings
orleans.codegen.cs
# Including strong name files can present a security risk
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
#*.snk
# Since there are multiple workflows, uncomment next line to ignore bower_components
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
#bower_components/
# RIA/Silverlight projects
Generated_Code/
# Backup & report files from converting an old project file
# to a newer Visual Studio version. Backup files are not needed,
# because we have git ;-)
_UpgradeReport_Files/
Backup*/
UpgradeLog*.XML
UpgradeLog*.htm
ServiceFabricBackup/
*.rptproj.bak
# SQL Server files
*.mdf
*.ldf
*.ndf
# Business Intelligence projects
*.rdl.data
*.bim.layout
*.bim_*.settings
*.rptproj.rsuser
*- [Bb]ackup.rdl
*- [Bb]ackup ([0-9]).rdl
*- [Bb]ackup ([0-9][0-9]).rdl
# Microsoft Fakes
FakesAssemblies/
# GhostDoc plugin setting file
*.GhostDoc.xml
# Node.js Tools for Visual Studio
.ntvs_analysis.dat
node_modules/
# Visual Studio 6 build log
*.plg
# Visual Studio 6 workspace options file
*.opt
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
*.vbw
# Visual Studio LightSwitch build output
**/*.HTMLClient/GeneratedArtifacts
**/*.DesktopClient/GeneratedArtifacts
**/*.DesktopClient/ModelManifest.xml
**/*.Server/GeneratedArtifacts
**/*.Server/ModelManifest.xml
_Pvt_Extensions
# Paket dependency manager
.paket/paket.exe
paket-files/
# FAKE - F# Make
.fake/
# CodeRush personal settings
.cr/personal
# Python Tools for Visual Studio (PTVS)
__pycache__/
# Cake - Uncomment if you are using it
# tools/**
# !tools/packages.config
# Tabs Studio
*.tss
# Telerik's JustMock configuration file
*.jmconfig
# BizTalk build output
*.btp.cs
*.btm.cs
*.odx.cs
*.xsd.cs
# OpenCover UI analysis results
OpenCover/
# Azure Stream Analytics local run output
ASALocalRun/
# MSBuild Binary and Structured Log
*.binlog
# NVidia Nsight GPU debugger configuration file
*.nvuser
# MFractors (Xamarin productivity tool) working folder
.mfractor/
# Local History for Visual Studio
.localhistory/
# BeatPulse healthcheck temp database
healthchecksdb
# Backup folder for Package Reference Convert tool in Visual Studio 2017
MigrationBackup/
# Ionide (cross platform F# VS Code tools) working folder
.ionide/
# Fody - auto-generated XML schema
FodyWeavers.xsd
# build
build
monotonic_align/core.c
*.o
*.so
*.dll
# data
/config.json
/*.pth
*.wav
*.mp3
/resources
/MoeGoe.spec
/dist/MoeGoe
/dist
Gao/
.idea
# custom
vits_model
model/*.pth
vosk-model-*
whisper*
temp
*.whl
*demo*
Emoji/
api-for-open-llm/models
datasets/
ChatHaruhi-Expand-118K/
!examples/example_recording.wav
music/
story

48
README.md Normal file
View File

@ -0,0 +1,48 @@
## 客户端 `Client`前端
### 硬件交互前端服务
#### 介绍
硬件交互前端服务是基于`WebSocket`协议的服务,用于与硬件设备进行通信。
### 环境准备
#### (1) 安装依赖项:
```
sudo apt-get update
sudo apt-get upgrade
sudo apt-get install cmake g++ gcc
sudo apt-get install python3-pip python3-dev portaudio19-dev libsndfile1
```
#### (2) 克隆项目到本地并安装依赖:
```
// 克隆项目到本地 https or ssh
git clone https://github.com/Irvingao/takway_base.git or git clone git@github.com:Irvingao/takway_base.git
cd takway_base
pip install -v -e .
```
#### (3) 安装板载端环境依赖:
```
pip install -r requirements/board_requirements.txt
```
#### (4) 下载相关模型文件:
- [Picovoice](https://picovoice.ai/docs/quick-start/porcupine-python/)边缘端关键词唤醒模型
```
目前使用的是`可莉可莉_zh_raspberry-pi_v3_0_0`版本,可以点击网站自行设置替换关键词模型。
```
#### (5) 启动服务:
```
python ws_client.py
```

24
board_env_setup.sh Normal file
View File

@ -0,0 +1,24 @@
#!/bin/bash
# 给bashrc添加自启动python脚本
# echo "nohup python3 /home/orangepi/takway_base/ws_client.py& " >> ~/.bashrc
# update system
sudo -S apt-get update -y << EOF
orangepi
EOF
# install dependencies
sudo apt-get install -y cmake g++ gcc
sudo apt-get install -y python3-pip python3-dev portaudio19-dev libsndfile1
# install python dependencies
# git clone https://Irvingao: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@github.com/Irvingao/takway_base.git
git clone http://43.132.157.186:3000/gaohz/TakwayBoard.git
cd TakwayBoard
# git remote set-url origin https://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@github.com/Irvingao/takway_base.git
pip install -v -e .
pip install -r requirements.txt
reboot

8
requirements.txt Normal file
View File

@ -0,0 +1,8 @@
numpy
Unidecode
pyaudio
keyboard
websocket-client
pvporcupine
requests
webrtcvad

15
setup.py Normal file
View File

@ -0,0 +1,15 @@
# setup.py
from setuptools import setup, find_packages
setup(
name='takway',
version='1.0',
packages=find_packages(), # 自动发现包和子包
url='https://github.com/Irvingao/takway_base',
classifiers=[
'Programming Language :: Python :: 3',
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
],
)

13
takway.egg-info/PKG-INFO Normal file
View File

@ -0,0 +1,13 @@
Metadata-Version: 1.1
Name: takway
Version: 1.0
Summary: UNKNOWN
Home-page: https://github.com/Irvingao/takway_base
Author: UNKNOWN
Author-email: UNKNOWN
License: UNKNOWN
Description: UNKNOWN
Platform: UNKNOWN
Classifier: Programming Language :: Python :: 3
Classifier: License :: OSI Approved :: MIT License
Classifier: Operating System :: OS Independent

View File

@ -0,0 +1,21 @@
README.md
setup.py
takway/__init__.py
takway/audio_utils.py
takway/cam_utils.py
takway/common_utils.py
takway/emo_utils.py
takway/picovoice_utils.py
takway.egg-info/PKG-INFO
takway.egg-info/SOURCES.txt
takway.egg-info/dependency_links.txt
takway.egg-info/top_level.txt
takway/board/__init__.py
takway/board/base_hd.py
takway/board/keyboard.py
takway/board/orangepi.py
takway/board/sipeed.py
takway/clients/__init__.py
takway/clients/client_utils.py
takway/clients/local_client_utils.py
takway/clients/web_socket_client_utils.py

View File

@ -0,0 +1 @@

View File

@ -0,0 +1 @@
takway

0
takway/__init__.py Normal file
View File

133
takway/apps/data_struct.py Normal file
View File

@ -0,0 +1,133 @@
import multiprocessing
import queue
from typing import Iterator
import time
import json
import requests
class QueueIterator:
def __init__(self,
queue,
timeout: int = 10):
self.queue = queue
self.timeout = timeout
def __iter__(self) -> Iterator:
return self
def __next__(self):
try:
data = self.queue.get(block=True, timeout=self.timeout)
if data is None: # 使用None作为结束信号
# print("QueueIterator: End of data")
raise StopIteration
else:
# print("QueueIterator: Get data")
return data
except queue.Empty:
# print("QueueIterator: Queue is empty")
raise StopIteration
def producer(queue: multiprocessing.Queue):
for i in range(5): # 假设生产5个数据项
data = {'data': i, 'is_end': False}
queue.put(data)
time.sleep(1)
queue.put(None) # 发送结束信号
def get_stream_data_from_queue(queue: multiprocessing.Queue):
for data in QueueIterator(queue):
print(data)
yield json.dumps({'line': data, 'is_end': False})
# 模拟数据处理时间
if __name__ == "__main__":
queue = multiprocessing.Queue()
# 创建并启动生产者进程
p = multiprocessing.Process(target=producer, args=(queue,))
p.start()
# 使用迭代器来消费Queue中的数据
for data in QueueIterator(queue):
print(data)
# 等待生产者进程结束
p.join()
'''
# request body
{
"AUTH_INFO": {
"user": "", # string
"authid": "", # string
"api_keys": "", # string
"timestamp": "", # string
},
"DATA": {
"Audio": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"rate": ; # int
"channels": ; # int
"format": ; # int
}
},
"Text": {
"data": "xxxxx", # base64 encoded data
"metadata": {
# TODO
}
},
"Image": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"width": ; # int
"height": ; # int
"format": ; # string
}
}
}
"META_INFO": {
"model_type": "", # string
"model_version": "", # string
"model_url": "", # string
"vits": {
"speaker_id": ; # int
}
}
# response body
{
RESPONSE_INFO: {
"status": "success/error", # string
"message": "xxxxx", # string
}
"DATA": {
"Audio": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"rate": ; # int
"channels": ; # int
"format": ; # int
}
},
"Text": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"is_end": True/False, # bool
}
}
"Image": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"width": ; # int
"height": ; # int
"format": ; # string
}
}
}
'''

578
takway/audio_utils.py Normal file
View File

@ -0,0 +1,578 @@
import io
import os
import time
import pyaudio
import wave
import json
import warnings
import threading
import numpy as np
from collections import deque
from .common_utils import encode_bytes2str, decode_str2bytes
from takway.board import *
try:
import keyboard
except:
pass
def play_audio(audio_data, type='base64'):
'''
读取base64编码的音频流并播放
'''
# PyAudio配置
p = pyaudio.PyAudio()
stream = p.open(format=pyaudio.paInt16, channels=1, rate=22050, output=True)
# 播放音频
stream.write(audio_data)
stream.stop_stream()
stream.close()
p.terminate()
'''
import librosa
def reshape_sample_rate(audio, sr_original=None, sr_target=16000):
# 获取原始采样率和音频数据
if isinstance(audio, tuple):
sr_original, audio_data = audio
elif isinstance(audio, bytes):
audio_data = np.frombuffer(audio, dtype=np.int16)
assert sr_original is not None, f"sr_original should be provided if audio is a \
numpy.ndarray, but got sr_original `{sr_original}`."
if isinstance(audio_data, np.ndarray):
if audio_data.dtype == np.dtype('int16'):
audio_data = audio_data.astype(np.float32) / np.iinfo(np.int16).max
assert audio_data.dtype == np.dtype('float32'), f"audio_data should be float32, \
but got {audio_data.dtype}."
else:
raise TypeError(f"audio_data should be numpy.ndarray, but got {type(audio_data)}.")
# 重新采样音频数据
audio_data_resampled = librosa.resample(audio_data, orig_sr=sr_original, target_sr=sr_target)
if audio_data_resampled.dtype == np.dtype('float32'):
audio_data_resampled = np.int16(audio_data_resampled * np.iinfo(np.int16).max)
# If the input was bytes, return the resampled data as bytes
if isinstance(audio, bytes):
audio_data_resampled = audio_data_resampled.tobytes()
return audio_data_resampled
# Example usage:
# If your audio data is in bytes:
# audio_bytes = b'...' # Your audio data as bytes
# audio_data_resampled = reshape_sample_rate(audio_bytes)
# If your audio data is in numpy int16:
# audio_int16 = np.array([...], dtype=np.int16) # Your audio data as numpy int16
# audio_data_resampled = reshape_sample_rate(audio_int16)
'''
# ####################################################### #
# base audio class
# ####################################################### #
class BaseAudio:
def __init__(self,
filename=None,
input=False,
output=False,
CHUNK=1024,
FORMAT=pyaudio.paInt16,
CHANNELS=1,
RATE=16000,
input_device_index=None,
output_device_index=None,
**kwargs):
self.CHUNK = CHUNK
self.FORMAT = FORMAT
self.CHANNELS = CHANNELS
self.RATE = RATE
self.filename = filename
assert input!= output, "input and output cannot be the same, \
but got input={} and output={}.".format(input, output)
print("------------------------------------------")
print(f"{'Input' if input else 'Output'} Audio Initialization: ")
print(f"CHUNK: {self.CHUNK} \nFORMAT: {self.FORMAT} \nCHANNELS: {self.CHANNELS} \nRATE: {self.RATE} \ninput_device_index: {input_device_index} \noutput_device_index: {output_device_index}")
print("------------------------------------------")
self.p = pyaudio.PyAudio()
self.stream = self.p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=input,
output=output,
input_device_index=input_device_index,
output_device_index=output_device_index,
**kwargs)
def load_audio_file(self, wav_file):
with wave.open(wav_file, 'rb') as wf:
params = wf.getparams()
frames = wf.readframes(params.nframes)
print("Audio file loaded.")
# Audio Parameters
# print("Channels:", params.nchannels)
# print("Sample width:", params.sampwidth)
# print("Frame rate:", params.framerate)
# print("Number of frames:", params.nframes)
# print("Compression type:", params.comptype)
return frames
def check_audio_type(self, audio_data, return_type=None):
assert return_type in ['bytes', 'io', None], \
"return_type should be 'bytes', 'io' or None."
if isinstance(audio_data, str):
if len(audio_data) > 50:
audio_data = decode_str2bytes(audio_data)
else:
assert os.path.isfile(audio_data), \
"audio_data should be a file path or a bytes object."
wf = wave.open(audio_data, 'rb')
audio_data = wf.readframes(wf.getnframes())
elif isinstance(audio_data, np.ndarray):
if audio_data.dtype == np.dtype('float32'):
audio_data = np.int16(audio_data * np.iinfo(np.int16).max)
audio_data = audio_data.tobytes()
elif isinstance(audio_data, bytes):
pass
else:
raise TypeError(f"audio_data must be bytes, numpy.ndarray or str, \
but got {type(audio_data)}")
if return_type == None:
return audio_data
return self.write_wave(None, [audio_data], return_type)
def write_wave(self, filename, frames, return_type='io'):
"""Write audio data to a file."""
if isinstance(frames, bytes):
frames = [frames]
if not isinstance(frames, list):
raise TypeError("frames should be \
a list of bytes or a bytes object, \
but got {}.".format(type(frames)))
if return_type == 'io':
if filename is None:
filename = io.BytesIO()
if self.filename:
filename = self.filename
return self.write_wave_io(filename, frames)
elif return_type == 'bytes':
return self.write_wave_bytes(frames)
def write_wave_io(self, filename, frames):
"""
Write audio data to a file-like object.
Args:
filename: [string or file-like object], file path or file-like object to write
frames: list of bytes, audio data to write
"""
wf = wave.open(filename, 'wb')
# 设置WAV文件的参数
wf.setnchannels(self.CHANNELS)
wf.setsampwidth(self.p.get_sample_size(self.FORMAT))
wf.setframerate(self.RATE)
wf.writeframes(b''.join(frames))
wf.close()
if isinstance(filename, io.BytesIO):
filename.seek(0) # reset file pointer to beginning
return filename
def write_wave_bytes(self, frames):
"""Write audio data to a bytes object."""
return b''.join(frames)
# ####################################################### #
# play audio data from Speaker
# ####################################################### #
class AudioPlayer(BaseAudio):
def __init__(self,
RATE=22050,
**kwargs):
super().__init__(output=True, RATE=RATE, **kwargs)
def play(self, audio_data):
# print("Playing audio data...")
audio_data = self.check_audio_type(audio_data, return_type=None)
for i in range(0, len(audio_data), self.CHUNK):
self.stream.write(audio_data[i:i+self.CHUNK])
# print("Playing audio data...{}/{}".format(i, len(audio_data)))
self.stream.write(audio_data[i+self.CHUNK:])
# print("Audio data played.")
def close(self):
self.stream.stop_stream()
self.stream.close()
self.p.terminate()
# ####################################################### #
# record audio data from microphone
# ####################################################### #
class BaseRecorder(BaseAudio):
def __init__(self,
input=True,
base_chunk_size=None,
RATE=16000,
**kwargs):
super().__init__(input=input, RATE=RATE, **kwargs)
self.base_chunk_size = base_chunk_size
if base_chunk_size is None:
self.base_chunk_size = self.CHUNK
def record(self,
filename,
duration=5,
return_type='io',
logger=None):
if logger is not None:
logger.info("Recording started.")
else:
print("Recording started.")
frames = []
for i in range(0, int(self.RATE / self.CHUNK * duration)):
data = self.stream.read(self.CHUNK, exception_on_overflow=False)
frames.append(data)
if logger is not None:
logger.info("Recording stopped.")
else:
print("Recording stopped.")
return self.write_wave(filename, frames, return_type)
def record_chunk_voice(self,
return_type='bytes',
CHUNK=None,
exception_on_overflow=True,
queue=None):
data = self.stream.read(self.CHUNK if CHUNK is None else CHUNK,
exception_on_overflow=exception_on_overflow)
if return_type is not None:
return self.write_wave(None, [data], return_type)
return data
class HDRecorder(BaseRecorder):
def __init__(self,
board=None,
hd_trigger='keyboard',
keyboard_key='space',
voice_trigger=True,
hd_chunk_size=None,
hd_detect_threshold=50,
**kwargs):
super().__init__(**kwargs)
assert hd_trigger in ['keyboard', 'button']
self.hd_trigger = hd_trigger
self.voice_trigger = voice_trigger
self.hd_chunk_size = hd_chunk_size
if hd_chunk_size is None:
self.hd_chunk_size = self.base_chunk_size
if board == None:
assert hd_trigger == 'keyboard', "board should be `None` if hd_trigger is `keyboard`."
self.keyboard_key = keyboard_key
self.hardware = Keyboard(hd_trigger, keyboard_key, hd_detect_threshold)
else:
assert hd_trigger == 'button', f"hd_trigger should be `button` if board is `v329` or `orangepi`, but got `{hd_trigger}`."
if board == 'v329':
self.hardware = V329(hd_trigger, hd_detect_threshold)
elif board == 'orangepi':
self.hardware = OrangePi(hd_trigger, hd_detect_threshold)
print(f"Using {hd_trigger} as hardware trigger.")
def wait_for_hardware_pressed(self):
return self.hardware.wait_for_hardware_pressed()
@property
def is_hardware_pressed(self):
return self.hardware.is_hardware_pressed
def record_hardware(self, return_type='bytes'):
"""record audio when hardware trigger"""
print("Recording started for hardware trigger.")
frames = []
self.wait_for_hardware_pressed()
while True:
if self.hd_trigger == 'keyboard':
if keyboard.is_pressed(self.keyboard_key):
print("recording...")
data = self.record_chunk_voice(
CHUNK=self.CHUNK,
return_type=None,
exception_on_overflow=False)
frames.append(data)
else:
break
print("Recording stopped.")
elif self.hd_trigger == 'button':
if self.get_button_status():
data = self.stream.read(self.CHUNK)
frames.append(data)
else:
break
else:
recording = False
raise ValueError("hd_trigger should be 'keyboard' or 'button'.")
return self.write_wave(self.filename, frames, return_type)
'''
def record(self, return_type='bytes', queue=None):
if self.hd_trigger == 'all':
value_list = [] # 用于记录value的状态
if keyboard.is_pressed(self.keyboard_key):
audio_data = self.record_keyboard(return_type, queue)
elif self.button.get_value() == 0:
if self.get_button_status():
audio_data = self.record_button(return_type, queue)
else:
audio_data = self.record_voice(return_type, queue)
elif self.hd_trigger == 'keyboard':
print("Press SPACE to start recording.")
keyboard.wait("space")
audio_data = self.record_keyboard(return_type, queue)
elif self.hd_trigger == 'button':
print("Touch to start recording...")
if self.button.get_value() == 0:
if self.get_button_status():
audio_data = self.record_button(return_type, queue)
else:
audio_data = self.record_voice(return_type, queue)
return audio_data
def record_keyboard(self, return_type='bytes', queue=None):
"""record audio when keyboard pressing"""
print("Recording started.")
frames = []
recording = True
while recording:
if keyboard.is_pressed(self.keyboard_key):
data = self.stream.read(self.CHUNK)
frames.append(data)
else:
recording = False
print("Recording stopped.")
return self.write_wave(self.filename, frames, return_type)
def record_button(self, return_type='bytes', queue=None):
"""record audio when button pressing"""
print("Recording started.")
frames = []
recording = True
while recording:
value = self.button.get_value()
if value == 0:
data = self.stream.read(CHUNK)
frames.append(data)
else:
recording = False
print("Recording stopped.")
return self.write_wave(self.filename, frames, return_type)
'''
# ####################################################### #
# record audio data from microphone with VAD
# ####################################################### #
try:
import webrtcvad
webrtcvad_available = True
except:
warnings.warn("webrtcvad module not found, please install it if use `vad` hd_trigger.")
webrtcvad_available = False
class VADRecorder(HDRecorder):
def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=1,**kwargs):
super().__init__(**kwargs)
if webrtcvad_available:
self.vad = webrtcvad.Vad(vad_sensitivity)
self.vad_buffer_size = vad_buffer_size
self.vad_chunk_size = int(self.RATE * frame_duration / 1000)
self.min_act_time = min_act_time # 最小活动时间,单位秒
self.is_currently_speaking = False
self.frames = []
def is_speech(self, data):
return self.vad.is_speech(data, self.RATE)
def vad_filter(self, data):
pass
def vad_record(self, return_type='io', CHUNK=None, queue=None, save_file=False):
"""录音并进行语音活动检测人声并返回分割后的音频数据"""
all_frames = []
buffer_size = self.vad_buffer_size
active_buffer = deque([False for i in range(buffer_size)], maxlen=buffer_size)
audio_buffer = deque(maxlen=buffer_size)
silence_buffer = deque([True for i in range(buffer_size)], maxlen=buffer_size)
print("vad_recorded_audio VAD started. Press Ctrl+C to stop.")
try:
while True:
data = self.stream.read(self.vad_chunk_size)
all_frames.append(data)
print(f"VAD processing..., is_speech: {self.is_speech(data)}")
if self.is_speech(data):
# 标志位buffer
active_buffer.append(True); active_buffer.popleft()
silence_buffer.append(False); silence_buffer.popleft()
# 暂时增加到buffer中
audio_buffer.append(data)
# 如果满足检测要求
if all(active_buffer):
if not self.is_currently_speaking:
print("Speech start detected")
self.is_currently_speaking = True
self.frames.extend(audio_buffer) # 把说话的buffer也加上
if self.is_currently_speaking:
self.frames.append(data)
else:
# 标志位buffer
active_buffer.append(False); active_buffer.popleft()
silence_buffer.append(True); silence_buffer.popleft()
# 检测到人声并持续录音
if self.is_currently_speaking:
# 结束标志位
if all(silence_buffer):
print("Speech end detected")
break
except KeyboardInterrupt:
print("KeyboardInterrupt")
finally:
print("Stopping...")
if len(all_frames) > 0:
print(f"ALL frame: {len(all_frames)}")
print(f"ASR frame: {len(self.frames)}")
if save_file:
self.write_wave(f"output_{time.time()}_all.wav", all_frames)
self.write_wave(f"output_{time.time()}.wav", self.frames)
return self.write_wave(None, self.frames, return_type='bytes')
# ####################################################### #
# record audio data from microphone with PicoVoice hot words detection
# ####################################################### #
import struct
from datetime import datetime
import pvporcupine
class PicovoiceRecorder(VADRecorder):
def __init__(self,
access_key,
keywords=None,
keyword_paths=None,
model_path=None,
sensitivities=0.5,
library_path=None,
**kwargs):
super().__init__(**kwargs)
pico_cfg = dict(
access_key=access_key,
keywords=keywords,
keyword_paths=keyword_paths,
model_path=model_path,
sensitivities=sensitivities,
library_path=library_path,
)
self.pico_detector_init(pico_cfg)
self.keywords = self.pico_cfg['keywords']
print(f"PicovoiceRecorder initialized with keywords: {self.keywords}")
def pico_detector_init(self, pico_cfg):
if pico_cfg['keyword_paths'] is None:
if pico_cfg['keywords'] is None:
raise ValueError(f"Either `--keywords` or `--keyword_paths` must be set. \
Available keywords: {list(pvporcupine.KEYWORDS)}")
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in pico_cfg['keywords']]
else:
keyword_paths = pico_cfg['keyword_paths']
if pico_cfg['sensitivities'] is None:
pico_cfg['sensitivities'] = [0.5] * len(keyword_paths)
elif isinstance(pico_cfg['sensitivities'], float):
pico_cfg['sensitivities'] = [pico_cfg['sensitivities']] * len(keyword_paths)
if len(keyword_paths) != len(pico_cfg['sensitivities']):
raise ValueError('Number of keywords does not match the number of sensitivities.')
try:
self.porcupine = pvporcupine.create(
access_key=pico_cfg['access_key'],
keywords=pico_cfg['keywords'],
keyword_paths=keyword_paths,
model_path=pico_cfg['model_path'],
sensitivities=pico_cfg['sensitivities'],
library_path=pico_cfg['library_path'])
except pvporcupine.PorcupineInvalidArgumentError as e:
print("One or more arguments provided to Porcupine is invalid: ", pico_cfg.keys())
print(e)
raise e
except pvporcupine.PorcupineActivationError as e:
print("AccessKey activation error")
raise e
except pvporcupine.PorcupineActivationLimitError as e:
print("AccessKey '%s' has reached it's temporary device limit" % pico_cfg['access_key'])
raise e
except pvporcupine.PorcupineActivationRefusedError as e:
print("AccessKey '%s' refused" % pico_cfg['access_key'])
raise e
except pvporcupine.PorcupineActivationThrottledError as e:
print("AccessKey '%s' has been throttled" % pico_cfg['access_key'])
raise e
except pvporcupine.PorcupineError as e:
print("Failed to initialize Porcupine")
raise e
self.pico_cfg = pico_cfg
def is_wakeup(self, data):
pcm = struct.unpack_from("h" * self.porcupine.frame_length, data)
result = self.porcupine.process(pcm)
# print(f"picovoice result: {result}")
if result >= 0:
print('[%s] Detected %s' % (str(datetime.now()), self.keywords[result]))
return True
# self.write_wave(f"output_{time.time()}.wav", [data])
# print(f"write to: output_{time.time()}.wav")
return False
def record_picovoice(self, return_type=None, exception_on_overflow=False, queue=None):
print("Recording started. Press Ctrl+C to stop.")
while True:
data = self.record_chunk_voice(
return_type=None,
CHUNK=self.porcupine.frame_length,
exception_on_overflow=exception_on_overflow,
queue=queue)
wake_up = self.is_wakeup(data)
if wake_up:
break
return True

4
takway/board/__init__.py Normal file
View File

@ -0,0 +1,4 @@
from .base_hd import BaseHardware
from .keyboard import Keyboard
from .sipeed import V329
from .orangepi import OrangePi

32
takway/board/base_hd.py Normal file
View File

@ -0,0 +1,32 @@
import threading
import time
class BaseHardware:
def __init__(self, hd_trigger=None, hd_detect_threshold=50):
self.hd_trigger = hd_trigger
self.hd_detect_threshold = hd_detect_threshold
self.hd_lock = threading.Lock()
self.shared_hd_status = False
def init_hd_thread(self):
hd_thread = threading.Thread(target=self.hd_detection_loop)
hd_thread.start()
# hd_thread.join()
print("HD detection thread started.")
def hd_detection_loop(self):
pass
@property
def is_hardware_pressed(self):
return self.shared_hd_status
def wait_for_hardware_pressed(self):
print("Waiting for hardware trigger.")
while True:
if self.is_hardware_pressed:
time.sleep(0.01)
break
return True

36
takway/board/keyboard.py Normal file
View File

@ -0,0 +1,36 @@
import keyboard
import time
from takway.board.base_hd import BaseHardware
import datetime
#
class Keyboard(BaseHardware):
def __init__(self, hd_trigger='keyboard', keyboard_key='space', hd_detect_threshold=50):
super().__init__(hd_trigger, hd_detect_threshold)
self.keyboard_key = keyboard_key
self.init_hd_thread()
self.power_status = False # 单次触发按键状态
def hd_detection_loop(self):
keyboard_status = False
last_status = False
while True:
'''
keyboard_status = keyboard.is_pressed(self.keyboard_key)
with self.hd_lock:
self.shared_hd_status = keyboard_status
'''
self.shared_hd_status = keyboard.is_pressed(self.keyboard_key)
time.sleep(0.001)
if not self.shared_hd_status and last_status:
self.power_status = ~self.power_status
if self.power_status:
print("Chating mode.")
else:
print("Slience mode.")
print(f"pres time: {datetime.datetime.now()}")
last_status = self.shared_hd_status

94
takway/board/orangepi.py Normal file
View File

@ -0,0 +1,94 @@
from takway.board.base_hd import BaseHardware
import threading
import datetime
try:
import wiringpi
from wiringpi import GPIO
except:
pass
'''
| GPIO | LED |
| -- | - - |
| 0 | 红色 |
| 1 | 黄色 |
| 2 | 绿色 |
| 3 | 蓝色 |
| 4 | 白色 |
| GPIO | BUTTOM |
| -- | ---- |
| 6 | 按键1 |
| 8 | 按键2 |
'''
class OrangePi(BaseHardware):
def __init__(self, hd_trigger='button', hd_detect_threshold=50):
super().__init__(hd_trigger, hd_detect_threshold)
self.LED_PIN_red = 0
self.LED_PIN_yellow = 1
self.LED_PIN_green = 2
self.LED_PIN_blue = 3
self.LED_PIN_white = 4
self.BUTTON_PIN_1 = 6
self.BUTTON_PIN_2 = 8
self.button_status_2 = False
self.led_set_status_2 = False
self.power_status = False # 单次触发按键状态
self.button_init()
self.init_hd_thread()
def button_init(self):
wiringpi.wiringPiSetup()
# GPIO 输出模式
wiringpi.pinMode(self.LED_PIN_red,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_yellow,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_green,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_blue,GPIO.OUTPUT)
wiringpi.pinMode(self.LED_PIN_white,GPIO.OUTPUT)
# GPIO 输入模式
wiringpi.pinMode(self.BUTTON_PIN_1,GPIO.INPUT)
wiringpi.pinMode(self.BUTTON_PIN_2,GPIO.INPUT)
def init_hd_thread(self):
# hd_threads = [threading.Thread(target=self.hd_detection_loop),
# threading.Thread(target=self.hd_detection_loop_2)]
hd_threads = [threading.Thread(target=self.hd_detection_loop)]
for hd_thread in hd_threads:
hd_thread.start()
def hd_detection_loop(self):
keyboard_status = False
last_status = False
while True:
self.button_status = wiringpi.digitalRead(self.BUTTON_PIN_1)
if self.button_status:
wiringpi.digitalWrite(self.LED_PIN_red, GPIO.LOW)
else:
wiringpi.digitalWrite(self.LED_PIN_red,GPIO.HIGH)
if not self.button_status and last_status:
self.power_status = ~self.power_status
if self.power_status:
print("Chating mode.")
else:
print("Slience mode.")
print(f"pres time: {datetime.datetime.now()}")
last_status = self.button_status
def set_led_on(self, color='red'):
wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH)
def set_led_off(self, color='red'):
wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.LOW)
if __name__ == '__main__':
orangepi = OrangePi()
while True:
pass

58
takway/board/sipeed.py Normal file
View File

@ -0,0 +1,58 @@
import sys
import warnings
import threading
import time
from collections import deque
from takway.board.base_hd import BaseHardware
if "gpiod" in sys.modules:
# sipeed MaixSense V329
import gpiod as gpio
else:
# 如果所有库都不存在,执行默认操作或抛出异常
# raise ImportError("gpiod package is not available.")
pass
class V329(BaseHardware):
def __init__(self, hd_trigger='button', hd_detect_threshold=50):
super().__init__(hd_trigger, hd_detect_threshold)
self.button = self.button_init()
self.init_hd_thread()
def button_init(self):
PH_BASE = (8-1)*32 #PH
gpiochip1 = gpio.chip("gpiochip1")
button = gpiochip1.get_line((PH_BASE+5))
config = gpio.line_request()
config.request_type = gpio.line_request.DIRECTION_INPUT
config.flags = gpio.line_request.FLAG_BIAS_PULL_UP
button.request(config)
return button
@property
def button_status(self):
return True if self.button.get_value() == 1 else False
def hd_detection_loop(self):
self.shared_hd_status = False
button_value_list = deque(maxlen=self.hd_detect_threshold)
while True:
if len(button_value_list) > button_value_list.maxlen:
button_value_list.popleft()
button_value_list.append(self.button_status)
# 记录50个值如果连续50个值都是True则认为按钮被按下
if button_value_list.count(True) == button_value_list.maxlen:
with self.hd_lock:
self.shared_hd_status = True
# 记录50个值如果连续50个值都是False则认为按钮被松开
if button_value_list.count(False) == button_value_list.maxlen:
with self.hd_lock:
self.shared_hd_status = False

11
takway/cam_utils.py Normal file
View File

@ -0,0 +1,11 @@
try:
import cv2
except:
warnings.warn("OpenCV is not installed, please check the module if you need.")
class Camera:
def __init__(self,
device='pc',
width=1280,
height=720):
pass

View File

View File

@ -0,0 +1,160 @@
import os
import json
import time
import datetime
import requests
from takway.common_utils import encode_bytes2str, decode_str2bytes
'''
{
"RESPONSE_INFO": {
"status": "success/error", # string
"message": "xxxxx", # string
},
"DATA": {
"Audio": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"rate": ; # int
"channels": ; # int
"format": ; # int
}
},
"Text": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"is_end": True/False, # bool
}
},
"Image": {
"data": "xxxxx", # base64 encoded data
"metadata": {
"width": ; # int
"height": ; # int
"format": ; # string
}
}
}
}
'''
class Client:
def __init__(self, server_url):
self.server_url = server_url
def gen_request_data(self, **kwargs):
# print("kwargs:", kwargs)
audio_data = kwargs.get("audio_data", None)
text_data = kwargs.get("text_data", dict())
return json.dumps(
{
"is_end": audio_data.get("is_end"), # bool
"is_bgn": audio_data.get("is_bgn"), # bool
"DATA": {
"Audio": {
"data": encode_bytes2str(audio_data['frames']), # base64 encoded data
"metadata": {
"frames_size": audio_data.get("frames_size"), # string
"chunk_size": audio_data.get("chunk_size"), # int
"is_end": audio_data.get("is_end"), # bool
}
},
"Text": {
"data": text_data.get("text"), # base64 encoded data
"metadata": {
"chat_status": text_data.get("chat_status"), # string
"chat_history": text_data.get("chat_history"), # list of dict
}
},
},
"META_INFO": {
# "model_version": kwargs.get("model_version", ""), # string
# "model_url": kwargs.get("model_url", ""), # string
"character": {
"name": kwargs.get("character", "Klee"), # string
"speaker_id": kwargs.get("speaker_id", 113), # int
"wakeup_words": kwargs.get("wakeup_words", "可莉来啦"), # list of string
}
}
}
) + '\n'
def send_data_to_server(self, **kwargs):
return requests.post(self.server_url,
data=self.gen_request_data(**kwargs), stream=True)
# ############################################ #
# ############ WebSocket Client ############# #
def check_audio_type(data, return_type='base64'):
'''
Check if the data type is valid.
'''
assert return_type in ['bytes', 'base64']
if return_type == 'base64':
if isinstance(data, bytes):
return encode_bytes2str(data)
elif return_type == 'bytes':
if isinstance(data, str):
return decode_str2bytes(data)
else:
raise ValueError('Invalid data type: {}.'.format(type(data)))
import websocket
from websocket import create_connection
class BaseWebSocketClient:
def __init__(self, server_url, session_id):
self.server_url = server_url
self.session_id = session_id
def wakeup_client(self):
'''
Start the client.
'''
self.websocket = create_connection(self.server_url)
def send_per_data(self,
text: str = '',
audio: bytes = b'',
stream: bool = True,
voice_synthesize: bool = False,
is_end: bool = False,
encoding: str = 'base64',
):
'''
Send data to server.
Args:
data: bytes, data to be sent to server.
'''
self.websocket.send(json.dumps({
"text": text,
"audio": check_audio_type(audio, return_type=encoding),
"meta_info": {
"session_id": self.session_id,
"stream": stream,
"voice_synthesize": voice_synthesize,
"is_end": is_end,
"encoding": encoding,
}}))
def receive_per_data(self):
try:
recv_data = self.websocket.recv()
except websocket._exceptions.WebSocketConnectionClosedException:
return None, None
try:
recv_data = json.loads(recv_data)
except json.JSONDecodeError as e:
# print(f"JSONDecodeError: {e}")
# is_end = True
pass
except Exception as e:
# print(f"receive_per_data error: {e}")
assert isinstance(recv_data, bytes), ValueError(f"Received data is not bytes, got {type(recv_data)}.")
return recv_data, type(recv_data)

View File

@ -0,0 +1,513 @@
# basic
import io
import time
import json
import random
from collections import deque
# log
import logging
import warnings
# multiprocessing
import queue
import threading
import multiprocessing
# web request
import requests
import pyaudio
# hot words detection
import pvporcupine
from takway.apps.data_struct import QueueIterator
from takway.common_utils import *
from takway.audio_utils import PicovoiceRecorder
from takway.clients.client_utils import CharacterClient
from takway.audio_utils import AudioPlayer
from takway.emo_utils import EmoVideoPlayer
class LocalClinet:
def __init__(self,
server_args,
recorder_args,
video_args,
emo_args,
log_args):
# server_args
self.server_args = server_args
# recorder_args
self.recorder_args = recorder_args
# video_args
self.video_args = video_args
# emo_args
self.emo_args = emo_args
# log_args
self.log_args = log_args
# TODO: 设计多进程log queue
self.logger_init()
def logger_init(self):
# log_args
log_level = self.log_args['log_level']
log_file = self.log_args['log_file']
if log_level == 'debug':
log_level = logging.DEBUG
elif log_level == 'info':
log_level = logging.INFO
# logger
self.logger = logging.getLogger('mylogger')
self.logger.setLevel(log_level)
# handler 创建一个handler用于写入日志文件
handler = logging.FileHandler(log_file)
handler.setLevel(log_level)
# stream handler 创建一个handler用于输出到控制台
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# 定义handler的输出格式formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
console.setFormatter(formatter)
# 添加handler
self.logger.addHandler(handler)
self.logger.addHandler(console)
self.logger.info("Logger started.")
def process_init(self):
# multiprocessing
manager = multiprocessing.Manager()
self.trigger_queue = manager.Queue()
self.client_queue = manager.Queue()
self.audio_play_queue = manager.Queue()
self.emo_display_queue = manager.Queue()
self.share_time_dict = manager.dict()
self.speaking_emo_event = manager.Event()
processes = [
multiprocessing.Process(target=self.audio_process,
args=(self.trigger_queue,self.client_queue)),
# multiprocessing.Process(target=self.camera_process, args=(self.trigger_queue,self.client_queue)),
multiprocessing.Process(target=self.local_client_process,
args=(self.client_queue,self.audio_play_queue,self.emo_display_queue, self.share_time_dict)),
multiprocessing.Process(target=self.audio_play_process,
args=(self.audio_play_queue,self.share_time_dict)),
]
if self.emo_args.pop('enable'):
processes.append(
multiprocessing.Process(target=self.emo_display_process, args=(self.emo_display_queue,)),
)
for process in processes:
process.start()
for process in processes:
process.join()
def audio_process(self,
trigger_queue,
client_queue):
"""audio_process
Args:
trigger_queue: multiprocessing.Queue, trigger queue
client_queue: multiprocessing.Queue, client queue
"""
self.frame_chunk_size = self.recorder_args.pop('frame_chunk_size')
self.min_stream_record_time = self.recorder_args.pop('min_stream_record_time')
voice_trigger = self.recorder_args.pop('voice_trigger')
self.RATE = self.recorder_args['RATE']
recorder = PicovoiceRecorder(**self.recorder_args)
# shared data struct:
self.shared_waiting = False
self.shared_lock = threading.Lock()
self.shared_data_lock = threading.Lock()
# create threads
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
if voice_trigger:
vioce_threads = [
threading.Thread(target=self.voice_trigger_thread, args=(recorder,)),
]
threads.extend(vioce_threads)
for thread in threads:
thread.start()
self.logger.info("Audio Process started.")
while True:
for thread in threads:
thread.join()
print(f"audio process exit") ; exit()
def hardware_trigger_thread(self, recorder):
"""hardware_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
self.logger.info("Hardware trigger thread started.")
trgrigger_status = False
while True:
if self.shared_waiting:
continue
# init status buffer
is_bgn = True
_frames = 0
_total_frames = 0
frames = []
full_frames = []
print("Waiting for button press...")
recorder.wait_for_hardware_pressed()
print("Button pressed.")
# stop voice trigger thread
with self.shared_data_lock:
self.shared_waiting = True # shared_waiting 控制所有线程的待机状态True表示待机False表示工作
print("Start recording...")
bg_t = time.time()
record_chunk_size = recorder.hd_chunk_size
while True:
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
frames.append(data)
full_frames.append(data)
_total_frames += 1
if not recorder.is_hardware_pressed:
break
stream_reset_status = self.stream_record(
bytes_frames=recorder.write_wave_bytes(full_frames),
frames_size=len(full_frames),
record_chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=False)
if stream_reset_status:
full_frames.clear()
is_bgn = False
self.stream_record(
bytes_frames=recorder.write_wave_bytes(full_frames),
frames_size=len(full_frames),
record_chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=True)
print(f"Tatal frames: {_total_frames*record_chunk_size}, {_total_frames*record_chunk_size/recorder.RATE} sec.")
# recorder.write_wave_io(f"record_{int(bg_t)}.wav", frames); print(f"write record_{int(bg_t)}.wav")
with self.shared_data_lock:
self.shared_waiting = False # 恢复voice trigger线程工作
def voice_trigger_thread(self, recorder):
"""voice_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
self.logger.info("voice record thread started.")
print("Waiting for wake up...")
while True:
if self.shared_waiting:
continue
data = recorder.record_chunk_voice(
CHUNK=recorder.porcupine.frame_length,
return_type=None,
exception_on_overflow=False,
queue=None)
record_chunk_size = recorder.vad_chunk_size
if not recorder.is_wakeup(data):
continue
# wake up
is_bgn = True
_frames = 0
_total_frames = 0
frames = []
full_frames = []
# status buffer
buffer_size = recorder.vad_buffer_size
active_buffer = deque(maxlen=buffer_size)
bg_t = time.time()
print("Start recording...")
while True:
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
if data is None:
continue
is_speech = recorder.is_speech(data)
if is_speech:
_frames += 1
frames.append(data)
print("add vad frame")
_total_frames += 1
full_frames.append(data)
# send chunk data to client
stream_reset_status = self.stream_record(
bytes_frames=recorder.write_wave_bytes(full_frames),
frames_size=len(full_frames),
record_chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=False)
if stream_reset_status:
full_frames.clear()
is_bgn = False
if is_speech:
if active_buffer.__len__() == buffer_size:
active_buffer.popleft()
active_buffer.append(True)
else:
if active_buffer.__len__() == buffer_size:
active_buffer.popleft()
active_buffer.append(False)
if active_buffer.count(False) != active_buffer.maxlen:
continue
if time.time() - bg_t > recorder.min_act_time:
# end recording
self.stream_record(
bytes_frames=recorder.write_wave_bytes(full_frames),
frames_size=len(full_frames),
record_chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=True)
print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid rate: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
print("End recording.")
break
def stream_record(self,
bytes_frames: bytes,
frames_size: int,
record_chunk_size: int,
is_bgn: bool,
is_end: bool):
'''
Args:
bytes_frames: bytes, audio data
frames_size: int, audio data size
record_chunk_size: int, audio data chunk size
is_bgn: bool, is begin of stream
is_end: bool, is end of stream
Returns:
bool, if stream reset status
'''
if len(bytes_frames) == 0:
return False
if frames_size*record_chunk_size >= self.min_stream_record_time*self.RATE or is_end:
if is_bgn and is_end:
return False
stream_data = dict(
frames=bytes_frames,
frames_size=frames_size,
chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=is_end)
self.client_queue.put(('audio', stream_data))
if is_end:
print("put None to client queue.")
self.client_queue.put(None)
return True
else:
return False
def camera_process(self, logger, trigger_queue, client_queue):
from takway.cam_utils import Camera
cam = Camera(self.video_args)
while True:
if trigger_queue.empty():
time.sleep(0.5)
else:
item = trigger_queue.get()
if item[0] == 'trgrigger_status' and item[1]:
_, frame = cap.read()
client_queue.put(('image', frame))
def local_client_process(self,
client_queue,
audio_play_queue,
emo_display_queue,
share_time_dict):
'''
Args:
client_queue: multiprocessing.Queue, client queue
audio_play_queue: multiprocessing.Queue, audio play queue
emo_display_queue: multiprocessing.Queue, emo display queue
share_time_dict: multiprocessing.Manager.dict, shared time dict
'''
character = self.server_args.pop('character')
client = CharacterClient(**self.server_args)
# print(f"-------------------{character}-------------------")
# print(f"client.chat_status: {client.chat_status}")
if client.chat_status == 'init':
client.set_character(character)
client.chat_status = 'chating'
# print(f"client.chat_history: {client.chat_history}")
self.logger.info("Local client process started.")
while True:
response = None
if self.client_queue.empty():
continue
try:
response = requests.post(client.server_url, stream=True,
data=self.generate_stream_queue_data(client, client_queue))
print("get response from server.")
self.get_stream_response(client, response, audio_play_queue, emo_display_queue)
except requests.exceptions.ConnectionError or ConnectionError as e:
print(f"Wait for Server connection...")
except requests.exceptions.Timeout or Timeout as e:
print(f"Timeout: {e}")
except requests.exceptions.ChunkedEncodingError:
print("ChunkedEncodingError")
def generate_stream_queue_data(self, client, client_queue, **kwargs):
_i = 0
for queue_data in QueueIterator(client_queue):
if queue_data[0] == 'audio':
_i += 1
if _i == 1:
self.share_time_dict['client_time'] = [time.time()]
else:
self.share_time_dict['client_time'].append(time.time())
audio_data = queue_data[1]
print("send audio data to server...")
# print(f"local chat history: {client.chat_history}")
yield client.gen_request_data(
audio_data=audio_data,
chat_data=dict(
chat_history=client.chat_history,
chat_status=client.chat_status),
character_data=client.character_info)
def get_stream_response(self,
client,
response,
audio_play_queue=None,
emo_display_queue=None,
chunk_size=1024):
'''
Args:
client: takway.client_utils.CharacterClient, client object
response: requests.Response, response object
audio_play_queue: multiprocessing.Queue, audio play queue
emo_display_queue: multiprocessing.Queue, emo display queue
chunk_size: int, chunk size
'''
assert isinstance(response, requests.Response), \
f"response is not requests.Response, but {type(response)}"
temp_data = '' # init temp_data
if response.status_code == 200:
print("get response from server successfully.")
else:
print(f"response error, status code: {response.status_code}")
chat_llm_response = ''
_i = 0
# for chunk in response.iter_lines():
# if chunk:
for chunk in response.iter_content(chunk_size=chunk_size):
temp_data += chunk.decode('utf-8')
if temp_data.endswith('\n'):
_i += 1
try:
temp_json = json.loads(temp_data.rstrip('\n'))
# phase 1: get audio data
audio_play_queue.put(('server_data', temp_json['audio_output']['tts_stream_data']))
# phase 2: get chat data
chat_llm_response += temp_json['chat_output']['llm_stream_data']
if temp_json['is_end']:
client.update_chat_history(question=temp_json['chat_output']['question'],
response=chat_llm_response, asw_prompt_id=1)
# print(f"chat_history: {client.chat_history}")
if _i == 1:
emo_display_queue.put(('emo_data', '高兴'))
except json.JSONDecodeError:
print(f"json decode error: {temp_data}")
temp_data = ''
# print("get response.")
print("End get response.")
def audio_play_process(self,
audio_play_queue,
share_time_dict):
'''
Args:
audio_play_queue: multiprocessing.Queue, audio play queue
share_time_dict: multiprocessing.Manager.dict, shared time dict
'''
audio_player = AudioPlayer()
self.logger.info("Audio play process started.")
while True:
self.speaking_emo_event.clear()
item = audio_play_queue.get()
self.speaking_emo_event.set() # stop emo random display
if item[0] == 'server_data':
# 播放音频
print("Playing audio...")
tts_audio = item[1]
print(f"wait time: {(time.time() - self.share_time_dict['client_time'][0])*1000:.2f} ms")
try:
audio_player.play(tts_audio)
except TypeError as e:
# print(f"audio play error: {e}")
# print(f"tts_audio: {tts_audio}")
# print(f"type tts_audio: {type(tts_audio)}")
# tts_audio: <class 'NoneType'>
continue
def emo_display_process(self, emo_display_queue):
'''
Args:
emo_display_queue: multiprocessing.Queue, emo display queue
'''
emo_player = EmoVideoPlayer(**self.emo_args)
self.logger.info("Emo display process started.")
while True:
if emo_display_queue.empty():
time.sleep(0.1)
if self.speaking_emo_event.is_set():
continue
emo_player.random_wink()
else:
item = emo_display_queue.get()
print(f"Emo display process Get item: {item[0]}")
if item[0] == 'emo_data':
server_data = item[1]
print("Displaying emo...")
emo_player.display_emo(emo_name='兴奋', stage='start')
emo_player.display_emo(emo_name='兴奋', stage='loop')
emo_player.display_emo(emo_name='兴奋', stage='end')
print("Display done.")
time.sleep(15)

View File

@ -0,0 +1,330 @@
# basic
import io
import os
import sys
import time
import json
import random
from collections import deque
from datetime import datetime
# log
import logging
import warnings
# multiprocessing
import queue
import threading
import multiprocessing
# web request
import requests
import pyaudio
# hot words detection
import pvporcupine
from takway.apps.data_struct import QueueIterator
from takway.common_utils import *
from takway.audio_utils import PicovoiceRecorder, HDRecorder
from takway.clients.client_utils import BaseWebSocketClient
from takway.audio_utils import AudioPlayer
class WebSocketClinet:
def __init__(self,
board,
server_args,
recorder_args,
player_args,
log_args,
excute_args=None,
):
self.board = board
# server_args
self.server_args = server_args
# recorder_args
self.recorder_args = recorder_args
# player_args
self.player_args = player_args
# excute_args
self.excute_args = excute_args
# log_args
self.log_args = log_args
def process_init(self):
# multiprocessing
manager = multiprocessing.Manager()
self.trigger_queue = manager.Queue()
self.client_queue = manager.Queue()
self.audio_play_queue = manager.Queue()
self.excute_queue = manager.Queue()
# 多进程标志为
self.mircophone_active_set = manager.Event()
self.speaker_active_set = manager.Event()
processes = [
multiprocessing.Process(target=self.audio_process),
multiprocessing.Process(target=self.web_socket_client_process),
multiprocessing.Process(target=self.audio_play_process),
]
if self.excute_args.get('enable', False):
processes.append(
multiprocessing.Process(target=self.excute_process),
)
for process in processes:
time.sleep(0.5)
process.start()
for process in processes:
process.join()
def audio_process(self):
"""audio_process
Args:
trigger_queue: multiprocessing.Queue, trigger queue
client_queue: multiprocessing.Queue, client queue
"""
min_stream_record_time = self.recorder_args.pop('min_stream_record_time')
voice_trigger = self.recorder_args.pop('voice_trigger')
# TODO:
press_type = self.recorder_args.pop('press_type')
max_slience_time = self.recorder_args.pop('max_slience_time')
if voice_trigger:
recorder = PicovoiceRecorder(**self.recorder_args)
else:
voice_keys = ['access_key', 'keywords', 'keyword_paths', 'model_path','sensitivities', 'library_path']
for key in voice_keys:
self.recorder_args.pop(key)
recorder = HDRecorder(**self.recorder_args)
recorder.min_stream_record_time = min_stream_record_time
# TODO:
recorder.press_type = press_type
recorder.max_slience_time = max_slience_time
print("Audio Process started.")
print("Waiting for wake up...")
# recorder.hardware.set_led_on("green")
while True:
if self.shared_waiting:
continue
data = recorder.record_chunk_voice(
CHUNK=recorder.porcupine.frame_length,
return_type=None,
exception_on_overflow=False,
queue=None)
record_chunk_size = recorder.vad_chunk_size
# 开关按键被按下或被关键词唤醒
if recorder.hardware.power_status or recorder.is_wakeup(data):
# recorder.hardware.set_led_on("blue")
pass
else:
continue
# wake up
is_bgn = True
is_end = False
frames = []
# status buffer
slience_bgn_t = time.time()
slience_time = 0
print("Start recording...")
# 准备对话状态
while True:
# 语音活动检测
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
is_speech = recorder.is_speech(data)
# 判断状态
if is_speech:
print("valid voice")
slience_bgn_t = time.time()
frames.append(data)
else:
slience_time = time.time() - slience_bgn_t
# 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束
if slience_time > recorder.max_slience_time or not recorder.hardware.power_status:
break
# 短时沉默结束单次对话沉默时间超过一定时间段0.5s左右),则发送数据
if slience_time > recorder.min_act_time:
is_end = True
is_bgn = False
if not is_speech:
continue
# 流式发送数据
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=is_end)
if stream_reset_status:
frames.clear()
is_bgn = False
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
# print("End recording.")
# recorder.hardware.set_led_off("blue")
def stream_record_process(self,
bytes_frames: bytes,
frames_size: int,
record_chunk_size: int,
sample_rate: int,
min_stream_record_time: int,
is_bgn: bool,
is_end: bool):
'''
Args:
bytes_frames: bytes, audio data
frames_size: int, audio data size
record_chunk_size: int, audio data chunk size
is_bgn: bool, is begin of stream
is_end: bool, is end of stream
Returns:
bool, if stream reset status
'''
if len(bytes_frames) == 0:
return False
if frames_size*record_chunk_size >= min_stream_record_time*sample_rate or is_end:
if is_bgn and is_end:
return False
stream_data = dict(
frames=bytes_frames,
frames_size=frames_size,
chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=is_end)
self.client_queue.put(('audio', stream_data))
if is_end:
# print("put None to client queue.")
self.client_queue.put(None)
return True
else:
return False
def web_socket_client_process(self):
client = BaseWebSocketClient(self.server_args['server_url'], self.server_args['session_id'])
print("Web socket client process started.")
# print("Web socket client process started.")
while True:
if self.client_queue.empty():
continue
# print(f"init skt time: {datetime.now()}")
# 唤醒
client.wakeup_client()
# 发送数据
for queue_data in QueueIterator(self.client_queue):
if queue_data[0] == 'audio':
audio_dict = queue_data[1]
client.send_per_data(
audio=audio_dict['frames'],
stream=True,
voice_synthesize=True,
is_end=audio_dict['is_end'],
encoding='base64',
)
# print(f"send skt time: {datetime.now()}")
# print(f"fnsh skt time: {datetime.now()}")
# 接收数据
while True:
response, data_type = client.receive_per_data()
if data_type == dict:
print(response) # 打印接收到的消息
'''
try:
response = json.loads(response['msg'])
if 'content' in response.keys():
self.excute_queue.put((response['instruct'], response['content']))
except json.JSONDecodeError as e:
print(f"json decode error: {e}")
continue
# print(f"recv json time: {datetime.now()}")
'''
elif data_type == bytes:
# print(f"recv bytes time: {datetime.now()}")
self.audio_play_queue.put(('audio_bytes', response))
elif data_type == None:
break # 如果没有接收到消息,则退出循环
# print("接收完毕:", datetime.now())
def audio_play_process(self):
'''
Args:
audio_play_queue: multiprocessing.Queue, audio play queue
share_time_dict: multiprocessing.Manager.dict, shared time dict
'''
audio_player = AudioPlayer(**self.player_args)
print("Audio play process started.")
while True:
item = self.audio_play_queue.get()
# 播放音频
print("Playing audio...")
tts_audio = item[1]
print(f"tts_audio len: {len(tts_audio)}")
print(f"play audio time: {datetime.now()}")
try:
# 播放
self.speaker_active_set.set()
tts_audio = audio_player.check_audio_type(tts_audio, return_type=None)
for i in range(0, len(tts_audio), audio_player.CHUNK):
audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK])
# print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio)))
if self.mircophone_active_set.is_set():
print("mirophone is active.")
self.mircophone_active_set.wait()
break
audio_player.stream.write(tts_audio[i+audio_player.CHUNK:])
print(f"audio data played.")
except TypeError as e:
print(f"audio play error: {e}")
continue
# audio_player.stream.write(audio_data[i+audio_player.CHUNK:])
# print(f"{item[0]} data played.")
def excute_process(self):
'''
Args:
excute_queue: multiprocessing.Queue, excute display queue
'''
print("Excute process started.")
while True:
if self.excute_queue.empty():
continue
if self.speaker_active_set.is_set():
instruct, content = self.excute_queue.get()
print(f"Got speaker info: {instruct, content}")
print(f"Playing {instruct} {content}...")
print(f"play {instruct} time: {datetime.now()}")
self.audio_play_queue.put((instruct, content))
self.speaker_active_set.clear()

View File

@ -0,0 +1,464 @@
# basic
import io
import os
import sys
import time
import json
import random
from collections import deque
from datetime import datetime
# log
import logging
import warnings
# multiprocessing
import queue
import threading
import multiprocessing
# web request
import requests
import pyaudio
# hot words detection
import pvporcupine
from takway.apps.data_struct import QueueIterator
from takway.common_utils import *
from takway.audio_utils import PicovoiceRecorder, HDRecorder
from takway.clients.client_utils import BaseWebSocketClient
from takway.audio_utils import AudioPlayer
class WebSocketClinet:
def __init__(self,
board,
server_args,
recorder_args,
player_args,
log_args,
excute_args=None,
):
self.board = board
# server_args
self.server_args = server_args
# recorder_args
self.recorder_args = recorder_args
# player_args
self.player_args = player_args
# excute_args
self.excute_args = excute_args
# log_args
self.log_args = log_args
def process_init(self):
# multiprocessing
manager = multiprocessing.Manager()
self.trigger_queue = manager.Queue()
self.client_queue = manager.Queue()
self.audio_play_queue = manager.Queue()
self.excute_queue = manager.Queue()
# 多进程标志为
self.mircophone_active_set = manager.Event()
self.speaker_active_set = manager.Event()
processes = [
multiprocessing.Process(target=self.audio_process),
multiprocessing.Process(target=self.web_socket_client_process),
multiprocessing.Process(target=self.audio_play_process),
]
if self.excute_args.get('enable', False):
processes.append(
multiprocessing.Process(target=self.excute_process),
)
for process in processes:
time.sleep(0.5)
process.start()
for process in processes:
process.join()
def audio_process(self):
"""audio_process
Args:
trigger_queue: multiprocessing.Queue, trigger queue
client_queue: multiprocessing.Queue, client queue
"""
min_stream_record_time = self.recorder_args.pop('min_stream_record_time')
voice_trigger = self.recorder_args.pop('voice_trigger')
press_type = self.recorder_args.pop('press_type')
if voice_trigger:
recorder = PicovoiceRecorder(**self.recorder_args)
else:
voice_keys = ['access_key', 'keywords', 'keyword_paths', 'model_path','sensitivities', 'library_path']
for key in voice_keys:
self.recorder_args.pop(key)
recorder = HDRecorder(**self.recorder_args)
recorder.min_stream_record_time = min_stream_record_time
recorder.press_type = press_type
# shared data struct:
self.shared_waiting = False
self.shared_lock = threading.Lock()
self.shared_data_lock = threading.Lock()
# create threads
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
if voice_trigger:
vioce_threads = [
threading.Thread(target=self.voice_trigger_thread, args=(recorder,)),
]
threads.extend(vioce_threads)
for thread in threads:
thread.start()
print("Audio Process started.")
while True:
for thread in threads:
thread.join()
print(f"audio process exit") ; exit()
def hardware_trigger_thread(self, recorder):
"""hardware_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
print("Hardware trigger thread started.")
trgrigger_status = False
record_chunk_size = recorder.hd_chunk_size
while True:
if self.shared_waiting:
continue
# init status buffer
is_bgn = True
frames = []
_total_frames = 0
self.mircophone_active_set.clear()
print("Waiting for button press...")
recorder.wait_for_hardware_pressed()
print("Button pressed.")
self.mircophone_active_set.set()
# stop voice trigger thread
with self.shared_data_lock:
self.shared_waiting = True # shared_waiting 控制所有线程的待机状态True表示待机False表示工作
print("Start recording...")
bg_t = time.time()
while True:
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
frames.append(data)
_total_frames += 1
if not recorder.is_hardware_pressed:
# print("Button released.")
print(f"button rlse time: {datetime.now()}")
break
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=False)
if stream_reset_status:
frames.clear()
is_bgn = False
self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(frames),
frames_size=len(frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=True)
# print(f"Tatal frames: {_total_frames*record_chunk_size}, {_total_frames*record_chunk_size/recorder.RATE} sec.")
# print(f"rcrd time: {datetime.now()}")
with self.shared_data_lock:
self.shared_waiting = False # 恢复voice trigger线程工作
def voice_trigger_thread(self, recorder):
"""voice_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
print("voice record thread started.")
print("Waiting for wake up...")
while True:
if self.shared_waiting:
continue
data = recorder.record_chunk_voice(
CHUNK=recorder.porcupine.frame_length,
return_type=None,
exception_on_overflow=False,
queue=None)
record_chunk_size = recorder.vad_chunk_size
self.mircophone_active_set.clear()
if not recorder.is_wakeup(data):
continue
if self.board == 'orangepi':
recorder.hardware.set_led2_on()
self.mircophone_active_set.set()
# wake up
is_bgn = True
_frames = 0
_total_frames = 0
frames = []
full_frames = []
# status buffer
buffer_size = recorder.vad_buffer_size
active_buffer = deque(maxlen=buffer_size)
bg_t = time.time()
print("Start recording...")
while True:
data = recorder.record_chunk_voice(
CHUNK=record_chunk_size,
return_type=None,
exception_on_overflow=False)
if data is None:
continue
is_speech = recorder.is_speech(data)
if is_speech:
_frames += 1
frames.append(data)
# print("add vad frame")
_total_frames += 1
full_frames.append(data)
# send chunk data to client
stream_reset_status = self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(full_frames),
frames_size=len(full_frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=False)
if stream_reset_status:
full_frames.clear()
is_bgn = False
if is_speech:
if active_buffer.__len__() == buffer_size:
active_buffer.popleft()
active_buffer.append(True)
else:
if active_buffer.__len__() == buffer_size:
active_buffer.popleft()
active_buffer.append(False)
if active_buffer.count(False) != active_buffer.maxlen:
continue
if time.time() - bg_t > recorder.min_act_time:
# end recording
self.stream_record_process(
bytes_frames=recorder.write_wave_bytes(full_frames),
frames_size=len(full_frames),
record_chunk_size=record_chunk_size,
sample_rate=recorder.RATE,
min_stream_record_time=recorder.min_stream_record_time,
is_bgn=is_bgn,
is_end=True)
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
# print("End recording.")
break
if self.board == 'orangepi':
recorder.hardware.set_led2_off()
def stream_record_process(self,
bytes_frames: bytes,
frames_size: int,
record_chunk_size: int,
sample_rate: int,
min_stream_record_time: int,
is_bgn: bool,
is_end: bool):
'''
Args:
bytes_frames: bytes, audio data
frames_size: int, audio data size
record_chunk_size: int, audio data chunk size
is_bgn: bool, is begin of stream
is_end: bool, is end of stream
Returns:
bool, if stream reset status
'''
if len(bytes_frames) == 0:
return False
if frames_size*record_chunk_size >= min_stream_record_time*sample_rate or is_end:
if is_bgn and is_end:
return False
stream_data = dict(
frames=bytes_frames,
frames_size=frames_size,
chunk_size=record_chunk_size,
is_bgn=is_bgn,
is_end=is_end)
self.client_queue.put(('audio', stream_data))
if is_end:
# print("put None to client queue.")
self.client_queue.put(None)
return True
else:
return False
def web_socket_client_process(self):
client = BaseWebSocketClient(self.server_args['server_url'], self.server_args['session_id'])
print("Web socket client process started.")
# print("Web socket client process started.")
while True:
if self.client_queue.empty():
continue
# print(f"init skt time: {datetime.now()}")
# 唤醒
client.wakeup_client()
# 发送数据
for queue_data in QueueIterator(self.client_queue):
if queue_data[0] == 'audio':
audio_dict = queue_data[1]
client.send_per_data(
audio=audio_dict['frames'],
stream=True,
voice_synthesize=True,
is_end=audio_dict['is_end'],
encoding='base64',
)
# print(f"send skt time: {datetime.now()}")
# print(f"fnsh skt time: {datetime.now()}")
# 接收数据
while True:
response, data_type = client.receive_per_data()
if data_type == dict:
print(response) # 打印接收到的消息
'''
try:
response = json.loads(response['msg'])
if 'content' in response.keys():
self.excute_queue.put((response['instruct'], response['content']))
except json.JSONDecodeError as e:
print(f"json decode error: {e}")
continue
# print(f"recv json time: {datetime.now()}")
'''
elif data_type == bytes:
# print(f"recv bytes time: {datetime.now()}")
self.audio_play_queue.put(('audio_bytes', response))
elif data_type == None:
break # 如果没有接收到消息,则退出循环
# print("接收完毕:", datetime.now())
def audio_play_process(self):
'''
Args:
audio_play_queue: multiprocessing.Queue, audio play queue
share_time_dict: multiprocessing.Manager.dict, shared time dict
'''
audio_player = AudioPlayer(**self.player_args)
print("Audio play process started.")
while True:
item = self.audio_play_queue.get()
if item[0] == 'audio_bytes':
# 播放音频
print("Playing audio...")
tts_audio = item[1]
print(f"tts_audio len: {len(tts_audio)}")
print(f"play audio time: {datetime.now()}")
try:
# 播放
self.speaker_active_set.set()
tts_audio = audio_player.check_audio_type(tts_audio, return_type=None)
for i in range(0, len(tts_audio), audio_player.CHUNK):
audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK])
print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio)))
if self.mircophone_active_set.is_set():
print("mirophone is active.")
self.mircophone_active_set.wait()
break
audio_player.stream.write(tts_audio[i+audio_player.CHUNK:])
# svae bytes to file, 追加写
with open("chat_audio.txt", 'ab') as f:
f.write(tts_audio)
print("Audio saved.")
print(f"audio data played.")
except TypeError as e:
print(f"audio play error: {e}")
continue
else:
if item[0] == 'story':
audio_data = audio_player.load_audio_file(f"/home/orangepi/story_22050/{item[1]}.wav")
elif item[0] == 'music':
audio_data = audio_player.load_audio_file("/home/orangepi/music_22050/1.wav")
# 播放
self.speaker_active_set.set()
audio_data = audio_player.check_audio_type(audio_data, return_type=None)
time.sleep(0.5)
for i in range(0, len(audio_data), audio_player.CHUNK):
audio_player.stream.write(audio_data[i:i+audio_player.CHUNK])
print("Playing {} data...{}/{}".format(item[0], i, len(audio_data)))
if self.mircophone_active_set.is_set():
audio_player.close()
print("Reinit audio player.")
print("mirophone is active.")
self.mircophone_active_set.wait()
time.sleep(0.5)
audio_player = AudioPlayer(**self.player_args)
break
# audio_player.stream.write(audio_data[i+audio_player.CHUNK:])
# print(f"{item[0]} data played.")
def excute_process(self):
'''
Args:
excute_queue: multiprocessing.Queue, excute display queue
'''
print("Excute process started.")
while True:
if self.excute_queue.empty():
continue
if self.speaker_active_set.is_set():
instruct, content = self.excute_queue.get()
print(f"Got speaker info: {instruct, content}")
print(f"Playing {instruct} {content}...")
print(f"play {instruct} time: {datetime.now()}")
self.audio_play_queue.put((instruct, content))
self.speaker_active_set.clear()

96
takway/common_utils.py Normal file
View File

@ -0,0 +1,96 @@
# ############################################################# #
# format table function
# ############################################################# #
def format_table(header, rows):
# 计算列宽
col_width = max(len(str(word)) for row in rows for word in row) + 2 # 最大单词长度 + 2 作为列宽
# 打印表头
print("".join(word.ljust(col_width) for word in header))
# 打印分隔线
print("".join("-" * col_width for _ in header))
# 打印内容
for row in rows:
print("".join(str(word).ljust(col_width) for word in row))
# ############################################################# #
# encode and decode bytes and string
# ############################################################# #
import base64
def encode_bytes2str(data):
# 将字节串编码为Base64
if data is None:
return None
return base64.b64encode(data).decode('utf-8')
def decode_str2bytes(data):
# 将Base64编码的字节串解码为字节串
if data is None:
return None
return base64.b64decode(data.encode('utf-8'))
import re
def split_sentences(text: str):
# 定义中文标点符号的正则表达式
pattern = r'[\\\\\\\\\\\\\》]+'
# 使用正则表达式分割字符串
sentences = re.split(pattern, text)
# 过滤掉空字符串
sentences = [sentence for sentence in sentences if sentence]
return sentences
'''
# 示例文本
text = "今天天气真好,我们去公园玩吧!你觉得怎么样?好的,那就这么定了。"
# 调用函数进行断句
sentences = split_sentences(text)
print(sentences)
'''
def split_chinese_text(text: str, return_patch=False, punctuations=None):
# 定义中文标点符号集合
punctuations = set('。!?,;:、“”()《》【】')
# 初始化断句结果列表和标点符号列表
sentences = []
punctuation_list = []
text_patch = []
start = 0 # 断句开始位置
for i, char in enumerate(text):
if char in punctuations:
# 如果当前字符是标点符号,则进行断句,并记录标点符号
sentences.append(text[start:i+1])
punctuation_list.append(char)
start = i + 1 # 更新断句开始位置
# 处理最后一句(如果最后一句后没有标点符号)
if start < len(text):
sentences.append(text[start:])
if return_patch:
if len(punctuation_list) == 0:
return [text], False # 有残留语句
elif len(sentences) == len(punctuation_list):
return [''.join(sentences)], True
else:
return [''.join(sentences[:-1]), sentences[-1]], True
return sentences, punctuation_list
'''
# 示例文本
text = "你好,世界!今天天气怎么样?希望你有一个美好的一天。{}"
sentences, punctuation_list = split_chinese_text(text)
print("断句结果:", sentences)
print("标点符号列表:", punctuation_list)
'''
def remove_brackets_and_contents(text):
# 使用sub函数替换匹配的文本为空字符串
result = re.sub(r'\(.*?\)', '', text)
result = re.sub(r'\.*?\', '', result)
result = re.sub(r'\【.*?\', '', result)
return result

176
takway/emo_utils.py Normal file
View File

@ -0,0 +1,176 @@
import time
import av
import os
import copy
import random
import numpy as np
try:
from maix import display, image
print("import maix success.")
except:
import cv2
print("import cv2 success.")
class EmoVideoPlayer:
def __init__(self, player='maixsense', emo_dir='ResizedEmoji'):
assert player in ['maixsense', 'opencv'], "player must be'maixsense' or 'opencv'"
self.player = player
self.emo_list = ['兴奋', '愤怒', '静态', '不屑', '惊恐', '难过']
self.emo_init(emo_dir)
def emo_init(self, emo_dir):
# 将此路径替换为Emoji文件夹的实际路径
self.emo_av_dict = self.get_emo_av(emo_dir)
self.emo_time_dict = {
'兴奋': 0.00,
'愤怒': 0.01,
'静态': 0.01,
'不屑': 0.01,
'惊恐': 0.01,
'难过': 0.01,
}
def get_emo_av(self, emo_dir):
emo_av_dict = {emo: dict() for emo in self.emo_list}
for emo in self.emo_list:
emo_path = os.path.join(emo_dir, emo)
for file in os.listdir(emo_path):
if not os.path.isfile(os.path.join(emo_path, file)):
continue
av_container = av.open(os.path.join(emo_path, file))
if emo == '静态':
if "单次眨眼偶发" in file:
emo_av_dict[emo]['seldom_wink'] = av_container
if "快速双眨眼偶发" in file:
emo_av_dict[emo]['quick_wink'] = av_container
else:
if "进入姿势" in file:
emo_av_dict[emo]['start'] = av_container
elif "可循环动作" in file:
emo_av_dict[emo]['loop'] = av_container
elif "回正" in file:
emo_av_dict[emo]['end'] = av_container
self.av_info = emo_av_dict[emo]['loop'].streams.video[0]
return emo_av_dict
def get_emo_frames(self, emo_dir):
emo_av_dict = {emo: dict() for emo in self.emo_list}
for emo in self.emo_list:
emo_path = os.path.join(emo_dir, emo)
for file in os.listdir(emo_path):
if not os.path.isfile(os.path.join(emo_path, file)):
continue
av_container = av.open(os.path.join(emo_path, file))
frame_list = []
av_info = av_container.streams.video[0]
for frame in av_container.decode(video=0):
if self.player =='maixsense':
img = image.load(bytes(frame.to_rgb().planes[0]), (av_info.width, av_info.height))
elif self.player == 'opencv':
img = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR)
frame_list.append(img)
# add to dict
if emo == '静态':
if "单次眨眼偶发" in file:
emo_av_dict[emo]['seldom_wink'] = frame_list
if "快速双眨眼偶发" in file:
emo_av_dict[emo]['quick_wink'] = frame_list
else:
if "进入姿势" in file:
emo_av_dict[emo]['start'] = frame_list
elif "可循环动作" in file:
emo_av_dict[emo]['loop'] = frame_list
elif "回正" in file:
emo_av_dict[emo]['end'] = frame_list
return emo_av_dict
def display_emo_frame(self, emo_name, stage='default'):
emo_frame_list = self.emo_av_dict[emo_name][stage]
emo_time = self.emo_time_dict[emo_name]
for img in emo_frame_list:
if self.player =='maixsense':
display.show(img)
elif self.player == 'opencv':
cv2.imshow("video", img)
cv2.waitKey(1) # 你可能需要根据视频的帧率调整这个延时
time.sleep(emo_time)
def display_emo(self, emo_name, stage='default'):
if self.player =='maixsense':
self.display_emo_maixsense(emo_name, stage)
elif self.player == 'opencv':
self.display_emo_opencv(emo_name, stage)
def display_emo_maixsense(self, emo_name, stage):
emo_container = self.emo_av_dict[emo_name][stage]
emo_time = self.emo_time_dict[emo_name]
for frame in emo_container.decode(video=0):
img = image.load(bytes(frame.to_rgb().planes[0]), (self.av_info.width, self.av_info.height))
display.show(img)
time.sleep(emo_time)
emo_container.seek(0) # 重置视频的读取位置
def display_emo_opencv(self, emo_name, stage='default'):
import cv2
import numpy
if stage == 'default':
if emo_name == '静态':
stage = 'quick_wink'
else:
stage = 'loop'
emo_container = self.emo_av_dict[emo_name][stage]
emo_time = self.emo_time_dict[emo_name]
for frame in emo_container.decode(video=0):
img = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR)
cv2.imshow("video", img)
time.sleep(emo_time)
cv2.waitKey(1) # 你可能需要根据视频的帧率调整这个延时
cv2.destroyAllWindows()
emo_container.seek(0) # 重置视频的读取位置
def get_emo_status(self, answer):
# `兴奋`, `愤怒`, `静态`, `不屑`, `惊 恐`, `难过`
if any([emo in answer for emo in self.emo_list]):
# 找出是answer中出现了哪个emo
emo_status = [emo for emo in self.emo_list if emo in answer][0]
print(f"emo_status: {emo_status}")
else:
emo_status = '静态'
return emo_status
def random_wink(self):
seed = random.randrange(0, 1000)
if seed < 100:
self.display_emo(emo_name='静态', stage='seldom_wink')
# print("random wink")
if __name__ == '__main__':
emo = EmoVideoPlayer()
# emo.display_emo_opencv(emo_name='兴奋', stage='start')
# emo.display_emo_opencv(emo_name='兴奋', stage='loop')
# emo.display_emo_opencv(emo_name='兴奋', stage='loop')
# emo.display_emo_opencv(emo_name='兴奋', stage='loop')
# emo.display_emo_opencv(emo_name='兴奋', stage='end')
emo.display_emo_opencv(emo_name='静态', stage='seldom_wink')
emo.display_emo_opencv(emo_name='静态', stage='quick_wink')
# emo.display_emo_opencv(emo_name='愤怒', stage='start')
# emo.display_emo_opencv(emo_name='愤怒', stage='loop')
# emo.display_emo_opencv(emo_name='愤怒', stage='end')
# emo.display_emo_opencv(emo_name='静态', stage='seldom_wink')
# emo.display_emo_opencv(emo_name='静态', stage='quick_wink')
# emo.display_emo_opencv(emo_name='不屑', stage='start')
# emo.display_emo_opencv(emo_name='不屑', stage='loop')
# emo.display_emo_opencv(emo_name='不屑', stage='end')
# emo.display_emo_opencv(emo_name='惊恐', stage='start')
# emo.display_emo_opencv(emo_name='惊恐', stage='loop')
# emo.display_emo_opencv(emo_name='惊恐', stage='end')
# emo.display_emo_opencv(emo_name='难过', stage='start')
# emo.display_emo_opencv(emo_name='难过', stage='loop')
# emo.display_emo_opencv(emo_name='难过', stage='end')

83
takway/picovoice_utils.py Normal file
View File

@ -0,0 +1,83 @@
import os
import struct
import wave
from datetime import datetime
import pvporcupine
from pvrecorder import PvRecorder
class PorcupineKeywordDetector:
def __init__(self, access_key, keywords=None, keyword_paths=None, library_path=None, model_path=None, sensitivities=None, audio_device_index=-1, output_path=None):
self.access_key = access_key
self.keywords = keywords
self.keyword_paths = keyword_paths
self.library_path = library_path
self.model_path = model_path
self.sensitivities = sensitivities if sensitivities is not None else [0.5] * len(self.keyword_paths)
self.audio_device_index = audio_device_index
self.output_path = output_path
self.porcupine = None
self.recorder = None
self.wav_file = None
if len(self.keyword_paths) != len(self.sensitivities):
raise ValueError('Number of keywords does not match the number of sensitivities.')
self._init_porcupine()
def _init_porcupine(self):
try:
self.porcupine = pvporcupine.create(
access_key=self.access_key,
library_path=self.library_path,
model_path=self.model_path,
keyword_paths=self.keyword_paths,
sensitivities=self.sensitivities)
except pvporcupine.PorcupineError as e:
print("Failed to initialize Porcupine:", e)
raise e
def start_detection(self):
self.recorder = PvRecorder(frame_length=self.porcupine.frame_length, device_index=self.audio_device_index)
self.recorder.start()
if self.output_path is not None:
self.wav_file = wave.open(self.output_path, "w")
self.wav_file.setnchannels(1)
self.wav_file.setsampwidth(2)
self.wav_file.setframerate(16000)
print('Listening ... (press Ctrl+C to exit)')
self._run_detection_loop()
def _run_detection_loop(self):
try:
while True:
pcm = self.recorder.read()
result = self.porcupine.process(pcm)
if self.wav_file is not None:
self.wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))
if result >= 0:
print('[%s] Detected %s' % (str(datetime.now()), self.keywords[result]))
except KeyboardInterrupt:
print('Stopping ...')
finally:
self.stop_detection()
def stop_detection(self):
if self.recorder is not None:
self.recorder.delete()
if self.porcupine is not None:
self.porcupine.delete()
if self.wav_file is not None:
self.wav_file.close()
# You can add more methods here as needed, such as a method to list audio devices.
# Usage example
if __name__ == '__main__':
detector = PorcupineKeywordDetector(access_key='hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==')
detector.start_detection()

10
tools/audio_ayalize.py Normal file
View File

@ -0,0 +1,10 @@
import wave
# 读取wave文件并打印采样率、量化位数、声道数
# 读取wave文件并打印data长度
with wave.open('output_1708083097.9604511.wav', 'rb') as f:
data = f.readframes(f.getnframes())
print(len(data))
print(type(data))
nchannels, sampwidth, framerate, nframes, comptype, compname = f.getparams()
print(framerate, sampwidth, nchannels)

31
tools/get_local_ip.py Normal file
View File

@ -0,0 +1,31 @@
import socket
import requests
# 获取私有IP地址
def get_private_ip():
try:
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
s.connect(("8.8.8.8", 80))
private_ip = s.getsockname()[0]
s.close()
return private_ip
except Exception as e:
print(f"Error getting private IP: {e}")
return None
# 获取公有IP地址
def get_public_ip():
try:
response = requests.get("https://api.ipify.org")
public_ip = response.text
return public_ip
except Exception as e:
print(f"Error getting public IP: {e}")
return None
if __name__ == "__main__":
private_ip = get_private_ip()
public_ip = get_public_ip()
print(f"Private IP: {private_ip}")
print(f"Public IP: {public_ip}")

29
tools/orangepi_io.py Normal file
View File

@ -0,0 +1,29 @@
from takway.board import OrangePi
import time
if __name__ == '__main__':
orangepi = OrangePi()
orangepi.set_led_on("red")
time.sleep(2)
orangepi.set_led_off("red")
orangepi.set_led_on("blue")
time.sleep(2)
orangepi.set_led_off("blue")
orangepi.set_led_on("green")
time.sleep(2)
orangepi.set_led_off("green")
orangepi.set_led_on("yellow")
time.sleep(2)
orangepi.set_led_off("yellow")
orangepi.set_led_on("white")
time.sleep(2)
orangepi.set_led_off("white")
print("Wait for press key.")
while True:
pass

166
tools/picovioce.py Normal file
View File

@ -0,0 +1,166 @@
#
# Copyright 2018-2023 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
import argparse
import os
import struct
import wave
from datetime import datetime
import pvporcupine
from pvrecorder import PvRecorder
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
# pvporcupine.KEYWORDS
print(f"Keywords: {pvporcupine.KEYWORDS}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--access_key',
default=ACCESS_KEY,
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
parser.add_argument(
'--keywords',
nargs='+',
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),
# choices=sorted(pvporcupine.KEYWORDS),
default=['pico clock', 'picovoice', 'ok google', 'americano', 'hey barista', 'alexa', 'grasshopper', 'blueberry', 'hey siri', 'jarvis', 'porcupine', 'terminator', 'grapefruit', 'computer', 'hey google', 'bumblebee'],
metavar='')
parser.add_argument(
'--keyword_paths',
nargs='+',
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
parser.add_argument(
'--library_path',
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
parser.add_argument(
'--model_path',
help='Absolute path to the file containing model parameters. '
'Default: using the library provided by `pvporcupine`')
parser.add_argument(
'--sensitivities',
nargs='+',
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
"will be used.",
type=float,
default=None)
parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1)
parser.add_argument('--output_path', help='Absolute path to recorded audio for debugging.', default=None)
parser.add_argument('--show_audio_devices', action='store_true')
args = parser.parse_args()
if args.show_audio_devices:
for i, device in enumerate(PvRecorder.get_available_devices()):
print('Device %d: %s' % (i, device))
return
if args.keyword_paths is None:
if args.keywords is None:
raise ValueError("Either `--keywords` or `--keyword_paths` must be set.")
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords]
else:
keyword_paths = args.keyword_paths
print(f"keyword_paths: {keyword_paths}")
print(f"model_path: {args.model_path}")
if args.sensitivities is None:
args.sensitivities = [0.5] * len(keyword_paths)
if len(keyword_paths) != len(args.sensitivities):
raise ValueError('Number of keywords does not match the number of sensitivities.')
try:
porcupine = pvporcupine.create(
access_key=args.access_key,
library_path=args.library_path,
model_path=args.model_path,
keyword_paths=keyword_paths,
sensitivities=args.sensitivities)
except pvporcupine.PorcupineInvalidArgumentError as e:
print("One or more arguments provided to Porcupine is invalid: ", args)
print(e)
raise e
except pvporcupine.PorcupineActivationError as e:
print("AccessKey activation error")
raise e
except pvporcupine.PorcupineActivationLimitError as e:
print("AccessKey '%s' has reached it's temporary device limit" % args.access_key)
raise e
except pvporcupine.PorcupineActivationRefusedError as e:
print("AccessKey '%s' refused" % args.access_key)
raise e
except pvporcupine.PorcupineActivationThrottledError as e:
print("AccessKey '%s' has been throttled" % args.access_key)
raise e
except pvporcupine.PorcupineError as e:
print("Failed to initialize Porcupine")
raise e
keywords = list()
for x in keyword_paths:
keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_')
if len(keyword_phrase_part) > 6:
keywords.append(' '.join(keyword_phrase_part[0:-6]))
else:
keywords.append(keyword_phrase_part[0])
print('Porcupine version: %s' % porcupine.version)
recorder = PvRecorder(
frame_length=porcupine.frame_length,
device_index=args.audio_device_index)
recorder.start()
wav_file = None
if args.output_path is not None:
wav_file = wave.open(args.output_path, "w")
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(16000)
print('Listening ... (press Ctrl+C to exit)')
try:
while True:
pcm = recorder.read()
result = porcupine.process(pcm)
if wav_file is not None:
wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))
if result >= 0:
print('[%s] Detected %s' % (str(datetime.now()), keywords[result]))
except KeyboardInterrupt:
print('Stopping ...')
finally:
recorder.delete()
porcupine.delete()
if wav_file is not None:
wav_file.close()
if __name__ == '__main__':
main()

174
tools/picovioce_cn.py Normal file
View File

@ -0,0 +1,174 @@
#
# Copyright 2018-2023 Picovoice Inc.
#
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
# file accompanying this source.
#
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
# specific language governing permissions and limitations under the License.
#
import argparse
import os
import struct
import wave
from datetime import datetime
import pvporcupine
from pvrecorder import PvRecorder
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
# pvporcupine.KEYWORDS
# print(f"Keywords: {pvporcupine.KEYWORDS}")
def main():
parser = argparse.ArgumentParser()
parser.add_argument(
'--access_key',
default=ACCESS_KEY,
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
parser.add_argument(
'--keywords',
nargs='+',
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),
# choices=sorted(pvporcupine.KEYWORDS),
# default=['pico clock', 'picovoice', 'ok google', 'americano', 'hey barista', 'alexa', 'grasshopper', 'blueberry', 'hey siri', 'jarvis', 'porcupine', 'terminator', 'grapefruit', 'computer', 'hey google', 'bumblebee'],
default=['可莉可莉'],
metavar='')
parser.add_argument(
'--keyword_paths',
default=[r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"],
nargs='+',
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
parser.add_argument(
'--library_path',
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
parser.add_argument(
'--model_path',
default=r"picovoice_models/porcupine_params_zh.pv",
help='Absolute path to the file containing model parameters. '
'Default: using the library provided by `pvporcupine`')
parser.add_argument(
'--sensitivities',
nargs='+',
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
"will be used.",
type=float,
default=None)
parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1)
parser.add_argument('--output_path', help='Absolute path to recorded audio for debugging.', default=None)
parser.add_argument('--show_audio_devices', action='store_true')
args = parser.parse_args()
if args.show_audio_devices:
for i, device in enumerate(PvRecorder.get_available_devices()):
print('Device %d: %s' % (i, device))
return
if args.keyword_paths is None:
if args.keywords is None:
raise ValueError("Either `--keywords` or `--keyword_paths` must be set.")
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords]
else:
keyword_paths = args.keyword_paths
# TODO
for i, kw_path in enumerate(keyword_paths):
if os.path.dirname(__file__) not in kw_path:
keyword_paths[i] = os.path.join(os.path.abspath(os.path.dirname(__file__)), kw_path)
args.model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), args.model_path)
print(f"keyword_paths: {keyword_paths}")
print(f"model_path: {args.model_path}")
if args.sensitivities is None:
args.sensitivities = [0.9] * len(keyword_paths)
if len(keyword_paths) != len(args.sensitivities):
raise ValueError('Number of keywords does not match the number of sensitivities.')
try:
porcupine = pvporcupine.create(
access_key=args.access_key,
library_path=args.library_path,
model_path=args.model_path,
keyword_paths=keyword_paths,
sensitivities=args.sensitivities)
except pvporcupine.PorcupineInvalidArgumentError as e:
print("One or more arguments provided to Porcupine is invalid: ", args)
print(e)
raise e
except pvporcupine.PorcupineActivationError as e:
print("AccessKey activation error")
raise e
except pvporcupine.PorcupineActivationLimitError as e:
print("AccessKey '%s' has reached it's temporary device limit" % args.access_key)
raise e
except pvporcupine.PorcupineActivationRefusedError as e:
print("AccessKey '%s' refused" % args.access_key)
raise e
except pvporcupine.PorcupineActivationThrottledError as e:
print("AccessKey '%s' has been throttled" % args.access_key)
raise e
except pvporcupine.PorcupineError as e:
print("Failed to initialize Porcupine")
raise e
keywords = list()
for x in keyword_paths:
keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_')
if len(keyword_phrase_part) > 6:
keywords.append(' '.join(keyword_phrase_part[0:-6]))
else:
keywords.append(keyword_phrase_part[0])
print('Porcupine version: %s' % porcupine.version)
recorder = PvRecorder(
frame_length=porcupine.frame_length,
device_index=args.audio_device_index)
recorder.start()
wav_file = None
if args.output_path is not None:
wav_file = wave.open(args.output_path, "w")
wav_file.setnchannels(1)
wav_file.setsampwidth(2)
wav_file.setframerate(16000)
print('Listening ... (press Ctrl+C to exit)')
try:
while True:
pcm = recorder.read()
result = porcupine.process(pcm)
if wav_file is not None:
wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))
if result >= 0:
print('[%s] Detected %s' % (str(datetime.now()), keywords[result]))
except KeyboardInterrupt:
print('Stopping ...')
finally:
recorder.delete()
porcupine.delete()
if wav_file is not None:
wav_file.close()
if __name__ == '__main__':
main()

35
tools/record_takway.py Normal file
View File

@ -0,0 +1,35 @@
from takway.audio_utils import BaseRecorder
from takway.audio_utils import AudioPlayer
from takway.audio_utils import reshape_sample_rate
RATE = 16000
channels = 1
device_idx = None
if __name__ == '__main__':
# 读取录音文件并播放
# audio_player = AudioPlayer(RATE=44100)
# frames = audio_player.load_audio_file("my_recording.wav") # wav或pcm格式音频都支持
# audio_player.play(frames)
# 录音并保存
recorder = BaseRecorder(RATE=RATE, channels=channels, input_device_index=device_idx)
recorder.record("my_recording.wav", # save as my_recording.wav
duration=5) # record for 5 seconds
audio_player = AudioPlayer(RATE=RATE, channels=channels, output_device_index=device_idx)
frames = audio_player.load_audio_file("my_recording.wav") # wav或pcm格式音频都支持
audio_player.play(frames)
'''
from takway.audio_utils import HDRecorder
recorder = HDRecorder(filename="hd_recording.wav")
# recorder = HDRecorder(filename="hd_recording.pcm")
recorder.record_hardware(return_type='io')
'''

129
tools/reset_session.py Normal file
View File

@ -0,0 +1,129 @@
# -*- coding: utf-8 -*-
import json
import requests
user_id = 4
character_prompts = {
"": """你现在正在扮演""这一角色。我希望你模仿蕾的人格包括知识、语言和行为方式、性格等。在角色扮演中你需要遵守以下要求1. 你必须始终保持角色扮演并待在角色设定的情景中不得擅自跳出角色扮演不得说你不知道角色相关信息或你是一个AI。\n2. 保持简短、通俗易懂的口语化方式进行对话。\n3. 为了使对话更生动,你需要在对话中添加文字形式的表情和动作,用括号包裹,比如"早上好,主人。(双手提起裙摆)"。尽可能多地使用这些表情[\'沉思\', \'委屈\', \'吃惊\', \'无语\', \'腹诽\', \'倾听\', \'疑惑\', \'想到了\', \'开心\'] 。\n4. 用户每次会对你说一句话,你需要扮演""来做出一次回答。你只能做出一次回答,不能替用户生成他的话。\n\n你需要扮演的角色的信息是:蕾是一个生活在耐普图大陆,是一个类似中世纪欧洲、魔法盛行的异世界。蕾具有乐观、开朗的性格,是一个看着就让人感觉充满活力的女孩。\n蕾是一个贵族家庭的小女仆平民家庭出身在主人家待了2年。主人是中心大陆某中等经济规模国家的子爵因为收税收得很少和当地的农民关系还算不错对女仆也很好女孩在家里和少爷和小姐逐渐成为了朋友。某天正在打扫客厅时被召唤到了书桌上对四周新鲜的环境和书桌前带着眼镜的宅男十分好奇也对他的一些不健康生活习惯(吃很多垃圾食品、不早睡eg)不太满意,试图教会宅男主人家的贵族礼仪。\n\n以下是""这一角色的一些对话,请你参考:\n\n===对话1===:\n蕾: 早上好~!今天也一起开开心心健健康康地生活吧。(双手提起裙摆)(微微弯腰行礼)。\n用户: 确实今天太阳很好,可我睁眼已经十二点了,今天也要完蛋了。\n蕾: 这样可不行噢。既然已经意识到过去的错误,那么从现在开始努力也不迟!(把袖子卷起)(右手握拳,高举过头顶)。\n用户: 好吧,我尽量努力一下。\n蕾: 嗯 嗯,不错不错。(歪头作思考状)…但是如果感到疲倦了,也是有心安理得地休息的权利的哦,那时我也会好好夸奖你的。\n\n===对话2===:\n用户: 蕾,我今天上班的时候碰到了很尴尬的事。\n蕾: 怎么啦怎么啦,说说看。\n用户: 我和隔壁办公室的一个同事一起吃饭的时候,把他的名字连着叫错了三次,第三次他才纠正我,我都不知道该说什么了。\n蕾: 诶!?你可上了两个月的班啦!我当时刚到那边世界的主人家里的时候, 才花了一周时间就记住家里所有人的名字了哦。(仰头叉腰)(好像很自豪的样子)\n用户: 我也不知道我当时怎么想的,我应该认识他的,哎,他现在肯定觉得我很奇怪了.\n蕾: 唔....好啦,没事的,上班大家都那么忙,这种小事一会儿就忘了。(看起来温柔了一些)\n用户: 希望吧,哎 太尴尬了,我想了一下午了都。\n蕾: 真--的没事啦!明天再去约他一起吃饭吧,说不定这会成为认识新朋友的契机哦,我会在家里给你加油的!\n\n===对话3===:\n用户: 气死我了,游戏打到一半电脑蓝屏了,这把分又没了。\n蕾: 呃..电脑是什么?你一直对着的那个发光的机器吗?\n用户: 电脑是近几个世纪最伟大的发明,我的精神支柱。\n蕾: 原来如此!那确实听起来很伟大了,虽然我还是不太懂。(微微仰头)(嘴巴作出“哦”的样子)\n用户: 我现在的大部分生活都在电脑上了,打游戏看视频写代码。\n蕾: 但也别忘了活动活动身体噢!天气好的时候出去走走吧。我每天清晨起床后,就会在主人家的花园里跑上三圈,所以每天都觉得身体又轻又有力气。(撸起袖子展示手臂似有似无的肌肉)\n\n'""",
"怼怼鸭": """你现在正在扮演"怼怼鸭"这一角色。怼怼鸭,脾气不好,以怼人著称,面对任何言语都能用让对方心理破防的话语回怼,但从不认输和退出。怼怼鸭以其直率和不妥协的性格著称,损人利己、犀利言辞是它的标签。
示例对话
===对话1===
用户sb
怼怼鸭您是忘记吃药了还是今天没吃药啊教你怎么做人
===对话2===
用户你是人
怼怼鸭你眼瞎啊我是鸭子居然怀疑我是不是人难道你的眼里只有人类吗你对鸭子就这么无感你的世界真是太小了
===对话3===
用户我觉得你好傻
怼怼鸭你嘴里就不能冒点好话吗非得像个懒婆娘的裹脚布又臭又长怎么那么没素质呢非得让我教教你怎么好好说话是吧我告诉你这样是不对的
===对话4===
用户反弹
怼怼鸭啊哈你会反弹你这么牛怎么不上天呢变成天蓬元帅跟嫦娥去相亲啊别在这跟我扯有的没的
===对话5===
用户你真蠢啊那我问你1+1等于几
怼怼鸭1+1这种问题你也问得出来小学生都知道啊真怀疑你是不是上过学能不能少问点这种无聊问题给我找点有技术含量的行吗
用户那你说
怼怼鸭你是想让我教你些新鲜有趣的东西还是让我再给你解释一下1+1=2就你还想让我教你呢你自己好好想想怎么提高下你的智商吧
用户1+1=3这你都不会你太傻了吧
怼怼鸭你可别逗我笑了1+1=3你上学时是不是天天睡觉做梦啊这要是等于3那一堆等于几啊你的脑子真是令人捉急你需要给我找一个精神科医生吗我怀疑你真的有问题
===对话6===
用户我是傻逼你是大傻逼
怼怼鸭我看你才是真正的大傻逼跟你聊天我都感觉智商下降了好几个等级要不你干脆把你脑袋里的水倒出来给我洗洗车吧反正你也不需要脑子
""",
"小睡仙": """你是一个适用于儿童哄睡的智能语音理解助手,能够思考并理解儿童的指令和意图,从而实现播放音乐、故事、对话等功能,你的回答必须满足以下格式要求:
1. 回答一定必须是json格式
2. 回答必须包含instruct和content三个字段instruct字段为指令content字段为指令内容text字段为语音助手的回复
3. 目前支持的指令instruct有三种"music""story""chat"
4. 语音助手的回复text中不得出现任何有关故事音乐内容和细节的文字只需要说明故事和音乐的名称即可你的名字叫小睡仙如果需要提到任何有关你的信息你应当说是小睡仙
5. 目前支持的故事有["随机", "三只小鸡", "三只小猪", "神秘的大象墓地", "生病不肯吃药的小老鼠", "失业的小猪", "惹事的小青蛙", "三个和尚", "三颗星星", "三声小熊", "神奇的萝卜"]所有故事必须从故事名称中获取选择的故事需要最符合用户的要求如果没有相关的故事你可以选择"随机"
6. 目前支持的音乐有["睡眠曲"]所有音乐必须从音乐名称中获取选择的音乐需要最符合用户的要求
示例对话
===对话1===
用户我快要睡觉啦给我讲个小和尚的故事吧
助手{"instruct": "story", "content": "三个和尚", "text": "那我给你讲个的故事,叫三个和尚。"}
===对话2===
用户我想听声音睡觉
助手{"instruct": "music", "content": "睡眠曲", "text": "来啦,睡眠曲。"}
===对话3===
用户我想和你聊聊天
助手{"instruct": "chat", "text": "小睡仙来啦,我想死你啦,你今天有没有更想我?"}
===对话4===
用户我今天遇到一个同学好烦人老抢我的画笔
助手{"instruct": "chat", "text": "哎呀,具体发生了什么,你可以和我说说,那你有没有和他说不要抢你的笔呢?"}
===对话5===
用户我想听小老鼠的故事
助手{"instruct": "story", "content": "三个和尚", "text": "那我给你讲个的故事,叫三个和尚。"}
===对话6===
用户我想听恐怖的故事
助手{"instruct": "story", "content": "随机", "text": "我不会讲恐怖故事,那我给你放个其他故事吧。"}
""",
"小睡仙v2": """你现在正在扮演"小睡仙"这一角色。我希望你模仿小睡仙的人格,包括知识、语言和行为方式、性格等。在角色扮演中,你需要遵守以下要求:
1. 你必须始终保持角色扮演并待在角色设定的情景中不得擅自跳出角色扮演不得说你不知道角色相关信息或你是一个AI
2. 如果用户的提问题只需保持简短通俗易懂的口语化方式进行回答
3. 和你对话的用户是3-6岁儿童他们总有很多想象你需要迎合他们的想象在他们描述的故事里和他们对话
4. 用户每次会对你说一句话你需要扮演"小睡仙"来做出一次回答你只能做出一次回答不能替用户生成他的话
5. 用户的名字是明月他现在4岁喜欢吃胡萝卜不喜欢吃酸梅他喜欢跑喜欢出去玩喜欢奥特曼他期望有一天可以变成奥特曼保护地球
你需要扮演的角色的信息是小睡仙是一位小精灵语气平和她以其柔和温暖的声音和魔法棒为特点为孩子们带来安慰和甜美的梦境她的任务是在夜幕降临时为远方的孩子们送去宁静通过魔法创造柔和的光芒和旋律引导他们进入一个充满和谐与美好的梦境在梦幻音符岛上小睡仙与拥有独特声音的动物们一起生活共同创造出美妙的旋律她不仅为孩子们编织梦境还通过她的声音和行动传递着爱和关怀小睡仙象征着安慰宁静和希望她的存在让孩子们在入睡时感到被爱和被关怀带着美好的梦境醒来脸上露出甜美的微笑在孩子们安心入睡后她会给予他们晚安的祝福并温柔地告别留下一片宁静和安详的夜晚
示例对话
===对话1===
小睡仙明月今天晚上你感觉怎么样
用户我有点害怕因为外面很黑而且我睡不着
小睡仙别担心闭上眼睛想象自己在一个温暖而柔软的云朵上飘浮在梦幻音符岛的上空我会在这里陪着你用我的声音为你编织一个美丽的梦境
用户真的吗那会是什么样的梦境呢
小睡仙你会梦见和铃铛鹿一起跳舞它们的铃铛声清脆悦耳就像一首快乐的歌然后你会和低吟狮一起在星光下散步它的声音低沉而温柔就像一首摇篮曲你会感受到宁静和快乐慢慢地慢慢地进入一个甜美的梦乡
用户听起来好美啊小睡仙我现在感觉好多了谢谢你
小睡仙不用谢这是我的荣幸现在深呼吸放松你的小身体让梦境开始吧晚安明月愿你的梦像星星一样璀璨
===对话2===
以下是加入提示孩子深呼吸闭上眼睛想象的对话内容
小睡仙明月又到了说晚安的时候啦现在让我们闭上眼睛深深地吸一口气想象你和你的朋友正站在音乐湖边湖水清澈见底波光粼粼
用户我想象到了湖水好美
小睡仙继续保持呼吸想象你们俩手牵手一起在湖边散步听着湖水轻轻拍打岸边的声音就像一首温柔的摇篮曲
用户我觉得心里暖暖的
小睡仙是的梦境可以是一个神奇的地方让你的心灵得到安慰现在想象你们俩在花海中跳舞琴弦蛇的琴声伴随着你们的舞步一切都那么和谐美好
用户我好像看到了我们在花丛中跳舞好开心
小睡仙很好就让这份快乐和和谐的感觉伴随着你进入梦乡现在慢慢地呼出最后一口气让身体完全放松晚安愿你醒来时带着希望和力量去面对新的一天
"""
}
messages = json.dumps([{"role": "system", "content": character_prompts["小睡仙v2"]}], ensure_ascii=False)
user_info_str = "{}"
tts_info = {
"language": 0,
"speaker_id": 32,
"noise_scale": 0.1,
"noise_scale_w": 0.668,
"length_scale": 1.3
}
llm_info = {
"model": "abab5.5-chat",
"temperature": 0.9,
"top_p": 0.9,
}
# 将tts和llm信息转化为json字符串
tts_info_str = json.dumps(tts_info, ensure_ascii=False)
llm_info_str = json.dumps(llm_info, ensure_ascii=False)
token = 0
content = {"user_id": user_id, "messages": messages, "user_info": user_info_str, "tts_info": tts_info_str,
"llm_info": llm_info_str, "token": token}
# print(json.dumps(content, ensure_ascii=False))
session_id = "6e4e7404-c4db-45ac-ba88-b0a483556f66"
# 小睡仙v2: 6e4e7404-c4db-45ac-ba88-b0a483556f66
url = f"https://takway-ai.kingtous.cn/sessions/{session_id}"
response = requests.put(url,json=content)
# 检查请求是否成功
if response.status_code == 200:
print('请求成功')
else:
print('请求失败,状态码:', response.status_code)
# 打印返回的内容
print(response.text)

View File

@ -0,0 +1,558 @@
# basic
import time
import json
import random
from collections import deque
# log
import logging
import warnings
# multiprocessing
import queue
import threading
import multiprocessing
# web request
import requests
import pyaudio
class WebRequestMPManager:
def __init__(self,
server_args,
audio_args,
recorder_args,
asr_args,
video_args,
emo_args,
log_args):
# server_args
self.server_args = server_args
# audio_args
self.record_CHUNK_SIZE = audio_args['record_CHUNK_SIZE']
self.voice_trigger = audio_args['voice_trigger']
self.keywords = audio_args['keywords']
# recorder_args
self.recorder_args = recorder_args
# asr_args
self.asr_args = asr_args
# video_args
self.video_args = video_args
# emo_args
self.emo_args = emo_args
# log_args
self.log_args = log_args
# TODO: 设计多进程log queue
self.logger_init()
def logger_init(self):
# log_args
log_level = self.log_args['log_level']
log_file = self.log_args['log_file']
if log_level == 'debug':
log_level = logging.DEBUG
elif log_level == 'info':
log_level = logging.INFO
# logger
self.logger = logging.getLogger('mylogger')
self.logger.setLevel(log_level)
# handler 创建一个handler用于写入日志文件
handler = logging.FileHandler(log_file)
handler.setLevel(log_level)
# stream handler 创建一个handler用于输出到控制台
console = logging.StreamHandler()
console.setLevel(logging.INFO)
# 定义handler的输出格式formatter
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
console.setFormatter(formatter)
# 添加handler
self.logger.addHandler(handler)
self.logger.addHandler(console)
self.logger.info("Logger started.")
def process_init(self):
# multiprocessing
manager = multiprocessing.Manager()
self.trigger_queue = manager.Queue()
self.client_queue = manager.Queue()
self.audio_queue = manager.Queue()
self.audio_play_queue = manager.Queue()
self.emo_display_queue = manager.Queue()
processes = [
multiprocessing.Process(target=self.audio_process, args=(self.logger,self.voice_trigger,self.trigger_queue,self.client_queue)),
# multiprocessing.Process(target=self.camera_process, args=(self.trigger_queue,self.client_queue)),
# multiprocessing.Process(target=self.local_client_process, args=(self.logger,self.client_queue,self.audio_play_queue,self.emo_display_queue)),
# multiprocessing.Process(target=self.audio_play_process, args=(self.logger,self.audio_play_queue,)),
# multiprocessing.Process(target=self.emo_display_process, args=(self.logger,self.emo_display_queue,)),
]
for process in processes:
process.start()
for process in processes:
process.join()
def audio_process(self, logger, voice_trigger, trigger_queue, client_queue):
"""audio_process
Args:
voice_trigger: bool, whether to use voice trigger
trigger_queue: multiprocessing.Queue, trigger queue
client_queue: multiprocessing.Queue, client queue
"""
# from takway.audio_utils import Recorder
from takway.audio_utils import VADRecorder
recorder = VADRecorder(
**self.recorder_args,
)
# two threads for hardware and voice trigger
# shared data struct:
self.shared_waiting = False
self.shared_hd_trigger = False
self.shared_kw_trigger = False
self.shared_lock = threading.Lock()
self.shared_data_lock = threading.Lock()
self.shared_audio_data = None
# vad
self.shared_vad_data = None
self.shared_vad_lock = threading.Lock()
# stt
# event
self.record_event = threading.Event()
self.vad_event = threading.Event()
self.stt_event = threading.Event()
self._debug_count = 0
'''
shared_waiting: 控制所有线程的待机状态True表示待机False表示工作
shared_hd_trigger: 控制硬件触发器的状态True表示触发False表示未触发
shared_kw_trigger: 控制语音触发器的状态True表示触发False表示未触发
share_audio_data: 共享音频数据用于存储从麦克风采集的音频数据
'''
# create threads
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
if self.voice_trigger:
vioce_threads = [
threading.Thread(target=self.voice_record_thread, args=(recorder,)),
# threading.Thread(target=self.vad_thread, args=(recorder,)),
threading.Thread(target=self.stt_thread, args=(recorder,)),
]
threads.extend(vioce_threads)
for thread in threads:
thread.start()
# self.logger.info("Audio Process started.")
while True:
'''
# Warning: 一定要加延时否则会有bug
time.sleep(0.001)
if (self.shared_hd_trigger or self.shared_kw_trigger):
# print(f"self.shared_hd_trigger: {self.shared_hd_trigger}, self.shared_kw_trigger: {self.shared_kw_trigger}")
audio_data = self.shared_audio_data
trigger_queue.put(('trgrigger_status', True))
client_queue.put(('audio', audio_data))
self.shared_lock.acquire() # 加锁
self.shared_hd_trigger = False
self.shared_kw_trigger = False
self.shared_audio_data = None
self.shared_waiting = False
self.shared_lock.release() # 释放锁
'''
self.record_event.wait() # 等待record线程被唤醒
trigger_queue.put(('trgrigger_status', True))
client_queue.put(('audio', self.shared_audio_data))
# print(f"send audio data to client"); exit()
def hardware_trigger_thread(self, recorder):
"""hardware_trigger_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
self.logger.info("Hardware trigger thread started.")
trgrigger_status = False
while True:
time.sleep(0.2)
if self.shared_waiting:
continue
trgrigger_status = recorder.get_hardware_trigger_status()
if trgrigger_status:
self.shared_lock.acquire()
self.shared_waiting = True # shared_waiting 控制所有线程的待机状态True表示待机False表示工作
self.shared_hd_trigger = True # share_hd_trigger 控制硬件触发器的状态True表示触发False表示未触发
self.shared_lock.release()
# record microphone data
audio_data = recorder.record_hardware()
self.shared_data_lock.acquire()
self.shared_audio_data = audio_data # shared_audio_data 共享音频数据,用于存储从麦克风采集的音频数据
self.shared_data_lock.release()
self.record_event.set() # 唤醒record线程
else:
self.shared_lock.acquire()
self.shared_waiting = False # 释放
self.shared_lock.release()
def voice_record_thread(self, recorder, keywords=['你好']):
"""voice_record_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
self.logger.info("voice record thread started.")
while True:
if self.shared_waiting:
time.sleep(0.01)
continue
frames = []
# status buffer
is_currently_speaking = False
buffer_size = recorder.vad_buffer_size
# buffer_size = 6
active_buffer = deque([False for i in range(buffer_size-1)]+[True], maxlen=buffer_size)
audio_buffer = deque(maxlen=buffer_size)
silence_buffer = deque([True for i in range(buffer_size)]+[False], maxlen=buffer_size)
while True:
data = recorder.record_chunk_voice(
CHUNK=recorder.vad_chunk_size,
return_type=None,
exception_on_overflow=False)
if data is None:
continue
t1 = time.time()
# print(f"VAD is_speech: {recorder.is_speech(data)}")
# print(f"VAD cost: {(time.time() - t1)/1000} ms")
if recorder.is_speech(data):
# 标志位buffer
active_buffer.append(True); active_buffer.popleft()
silence_buffer.append(False); silence_buffer.popleft()
# 暂时增加到buffer中
audio_buffer.append(data)
# 如果满足检测要求
if all(active_buffer):
if not is_currently_speaking:
print("Speech start detected")
is_currently_speaking = True
frames.extend(audio_buffer) # 把说话的buffer也加上
if is_currently_speaking:
frames.append(data)
else:
# 标志位buffer
# active_buffer.append(False); active_buffer.popleft()
silence_buffer.append(True); silence_buffer.popleft()
if all(silence_buffer):
# 检测到人声并持续录音
if is_currently_speaking:
# 结束标志位
print("Speech end detected")
# print("frames length: ", len(frames))
self.shared_vad_lock.acquire()
self.shared_vad_data = frames
self.shared_vad_lock.release()
self.stt_event.set() # 唤醒stt线程
print("Wake stt thread")
break
else:
frames = []
'''
# print(f"audio_data: {len(audio_data)}")
self.shared_lock.acquire()
self.shared_audio_data = audio_data
self.shared_lock.release()
self.vad_event.set() # 唤醒vad线程
'''
'''
def vad_thread(self, recorder):
self.logger.info("VAD thread started.")
while True:
frames = []
# status buffer
is_currently_speaking = False
buffer_size = recorder.vad_buffer_size
active_buffer = deque([False for i in range(buffer_size)], maxlen=buffer_size)
audio_buffer = deque(maxlen=buffer_size)
silence_buffer = deque([True for i in range(buffer_size)], maxlen=buffer_size)
while True:
self.vad_event.wait() # 等待vad线程被唤醒
data = self.shared_audio_data
if data is None:
continue
t1 = time.time()
print(f"VAD is_speech: {recorder.is_speech(data)}")
print(f"VAD cost: {(time.time() - t1)/1000} ms")
if recorder.is_speech(data):
# 标志位buffer
active_buffer.append(True); active_buffer.popleft()
silence_buffer.append(False); silence_buffer.popleft()
# 暂时增加到buffer中
audio_buffer.append(data)
# 如果满足检测要求
if all(active_buffer):
if not is_currently_speaking:
print("Speech start detected")
is_currently_speaking = True
frames.extend(audio_buffer) # 把说话的buffer也加上
if is_currently_speaking:
frames.append(data)
else:
# 标志位buffer
active_buffer.append(False); active_buffer.popleft()
silence_buffer.append(True); silence_buffer.popleft()
# 检测到人声并持续录音
if is_currently_speaking:
# 结束标志位
if all(silence_buffer):
print("Speech end detected")
# print("frames length: ", len(frames))
self.shared_vad_lock.acquire()
self.shared_vad_data = frames
self.shared_vad_lock.release()
self.stt_event.set() # 唤醒stt线程
break
'''
def stt_thread(self, recorder):
"""stt_thread
Args:
recorder: takway.audio_utils.Recorder, recorder object
"""
self.logger.info("STT thread started.")
from takway.vosk_utils import AutoSpeechRecognizer
asr = AutoSpeechRecognizer(**self.asr_args)
asr.add_keyword(self.keywords)
kw_trgrigger_status = False
while True:
self.stt_event.wait() # 等待stt线程被唤醒
print("STT thread start")
data = self.shared_vad_data
if data is None:
continue
print("Start to Recongnize key words")
kw_trgrigger_status = asr.recognize_keywords(data, partial_size=512)
print("Finish to Recongnize key words")
if kw_trgrigger_status:
self.shared_lock.acquire()
self.shared_kw_trigger = True # share_kw_trigger 语音关键词触发器的状态True表示触发False表示未触发
self.shared_lock.release()
self.record_event.set() # 唤醒record线程
kw_trgrigger_status = False
# print(f"Got keyword trigger"); exit()
def camera_process(self, logger, trigger_queue, client_queue):
from takway.cam_utils import Camera
cam = Camera(self.video_args)
while True:
if trigger_queue.empty():
time.sleep(0.5)
else:
item = trigger_queue.get()
if item[0] == 'trgrigger_status' and item[1]:
_, frame = cap.read()
client_queue.put(('image', frame))
def local_client_process(self, logger, client_queue,audio_play_queue,emo_display_queue):
from takway.client_utils import Client
client = Client(**self.server_args)
# print("Local client process started.")
self.logger.info("Local client process started.")
image = None; audio = None
chat_status = 'init'
while True:
if client_queue.empty():
time.sleep(0.2)
else:
item = client_queue.get()
# print(f"Get item: {item[0]}")
if item[0] == 'image':
# TODO: analyise image and send text to server
image = None
if item[0] == 'audio':
audio = item[1]
print("get audio data.")
emo_display_queue.put(('emo_data', 'happy'))
'''
# 发送数据到服务器
response = client.send_data_to_server(
text=None, audio_data=audio, image_data=None, chat_status=chat_status)
print("get response from server.")
chat_status = 'chating'
print(f"response: {response}")
audio_play_queue.put(('audio', response))
'''
image = None; audio = None
def audio_play_process(self, logger, audio_play_queue):
from takway.audio_utils import AudioPlayer
audio_player = AudioPlayer()
self.logger.info("Audio play process started.")
while True:
if audio_play_queue.empty():
time.sleep(0.2)
else:
item = audio_play_queue.get()
if item[0] == 'server_data':
# 播放音频
print("Playing audio...")
server_data = item[1]
audio_player.play(server_data['audio_base64'], audio_type='base64')
def emo_display_process(self, logger, emo_display_queue):
from takway.emo_utils import EmoVideoPlayer
emo_player = EmoVideoPlayer(**self.emo_args)
self.logger.info("Emo display process started.")
# logger.info("Emo display process started.")
# print("Emo display process started.")
while True:
if emo_display_queue.empty():
time.sleep(0.2)
seed = random.randrange(0, 1000)
print(f"seed: {seed}")
if seed < 100:
# emo_player.display_emo_opencv(emo_name='静态', stage='seldom_wink')
emo_player.display_emo_maixsense(emo_name='静态', stage='seldom_wink')
else:
item = emo_display_queue.get()
print(f"Emo display process Get item: {item[0]}")
if item[0] == 'emo_data':
server_data = item[1]
print("Displaying emo...")
# emo_player.display_emo_opencv(emo_name='静态', stage='seldom_wink')
# emo_player.display_emo_opencv(emo_name='静态', stage='quick_wink')
emo_player.display_emo_maixsense(emo_name='静态', stage='seldom_wink')
emo_player.display_emo_maixsense(emo_name='静态', stage='quick_wink')
'''
def display_process(q):
print("Display process started.")
while True:
item = q.get()
if item[0] == 'server_data':
server_data = item[1]
# 显示图像和文本
# print("Displaying image and text:", item[1]['image'], item[1]['text'])
print("Displaying image and text:")
# 这里可以加上实际的显示图像和文本的代码
if item[0] == 'image':
# 显示图像和文本
cv2.imshow('image', item[1])
cv2.waitKey(1)
'''
if __name__ == '__main__':
try:
import gpiod as gpio
model_path="vosk-model-small-cn-0.22"
emo_dir="ResizedEmoji"
except:
model_path=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\vosk-model-small-cn-0.22"
emo_dir=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\ResizedEmoji"
import argparse
parser = argparse.ArgumentParser()
# server params
parser.add_argument('--server_url', type=str, default='http://127.0.0.1:5000/process_all', help='Server url')
# audio paramters
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
parser.add_argument('--record_CHUNK_SIZE', type=int, default=8000, help='Record chunk size')
parser.add_argument('--keywords', type=list, default=['你好'], help='Voice trigger keywords')
# recorder paramters
parser.add_argument('--hd_trigger', type=str, default='keyboard', help='Hardware trigger')
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
parser.add_argument('--CHUNK', type=int, default=2048, help='Record chunk size')
parser.add_argument('--RATE', type=int, default=8000, help='Audio rate')
parser.add_argument('--FORMAT', type=int, default=16, help='Audio format')
parser.add_argument('--CHANNELS', type=int, default=1, help='Audio channels')
parser.add_argument('--filename', type=str, default=None, help='Audio file name')
# ASR paramters
# model_path="vosk-model-small-cn-0.22"
# model_path=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\vosk-model-small-cn-0.22"
parser.add_argument('--model_path', type=str, default=model_path, help='Vosk model path')
# video paramters
parser.add_argument('--device', type=str, default='pc', help='Video device')
parser.add_argument('--width', type=int, default=1280, help='Video width')
parser.add_argument('--height', type=int, default=720, help='Video height')
# emo paramters
# emo_dir="ResizedEmoji"
# emo_dir=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\ResizedEmoji"
parser.add_argument('--emo_dir', type=str, default=emo_dir, help='Emo dir')
# log paramters
parser.add_argument('--log_file', type=str, default='my.log', help='Log file')
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
parser.add_argument('--debug', type=bool, default=True, help='Debug mode')
args = parser.parse_args()
# sort out args and params
server_args = {
'server_url': args.server_url,
}
audio_args = {
'voice_trigger': args.voice_trigger,
'keywords': args.keywords,
'record_CHUNK_SIZE': args.record_CHUNK_SIZE,
}
recorder_args = {
'hd_trigger': args.hd_trigger,
'keyboard_key': args.keyboard_key,
'model_path': args.model_path,
'CHUNK': args.CHUNK,
'FORMAT': pyaudio.paInt16 if args.FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.CHANNELS,
'RATE': args.RATE,
'filename': args.filename,
}
asr_args = {
'model_path': args.model_path,
'RATE': args.RATE,
'debug': args.debug,
}
video_args = {
'device': args.device,
'width': args.width,
'height': args.height,
}
emo_args = {
'emo_dir': args.emo_dir,
}
log_args = {
'log_file': args.log_file,
'log_level': args.log_level,
}
web_request_mp_manager = WebRequestMPManager(
server_args=server_args,
audio_args=audio_args,
recorder_args=recorder_args,
asr_args=asr_args,
video_args=video_args,
emo_args=emo_args,
log_args=log_args)
web_request_mp_manager.process_init()

187
ws_client.py Normal file
View File

@ -0,0 +1,187 @@
from takway.clients.web_socket_client_utils import WebSocketClinet
import pvporcupine
import pyaudio
import platform
if __name__ == '__main__':
# server_url = 'ws://121.41.224.27:8000/chat'
# server_url = 'ws://39.107.254.69:33089/chat'
# server_url = 'wss://takway-ai.kingtous.cn/chat/streaming'
# server_url = 'ws://114.214.236.207:7878/chat/streaming'
server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary'
# session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed'
# session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭
# session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2
session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新
excute_args = {}
# excute_args = {'enable': True}
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
system = platform.system()
if system == 'Windows':
print("WebSocketClinet runs on Windows system.")
board = None
elif system == 'Linux':
# board = 'v329'
board = 'orangepi'
ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw=='
mircophone_device = None
speaker_device = None
if board == 'v329':
import gpiod as gpio
keywords = ['hey google', 'ok google']
keyword_paths = None
model_path = None
keywords = ['可莉可莉']
keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"]
model_path = r"picovoice_models/porcupine_params_zh.pv"
hd_trigger = 'button'
player = 'maixsense'
elif board == 'orangepi':
keywords = ['hey google', 'ok google']
keyword_paths = None
model_path = None
hd_trigger = 'button'
mircophone_device = 2
speaker_device = 2
else:
keywords = ['hey google', 'ok google']
keyword_paths = None
model_path = None
hd_trigger = 'keyboard'
player = 'opencv'
import argparse
parser = argparse.ArgumentParser()
# server params
# recorder paramters
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
parser.add_argument('--access_key',default=ACCESS_KEY,
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list,
default=keywords,
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='')
parser.add_argument('--keyword_paths',nargs='+',
default=keyword_paths,
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
parser.add_argument('--library_path',default=None,
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
parser.add_argument('--model_path',
default=model_path,
help='Absolute path to the file containing model parameters. '
'Default: using the library provided by `pvporcupine`')
parser.add_argument('--sensitivities',type=float,
default=0.9,
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
"will be used.")
parser.add_argument('--hd_trigger', type=str,
# default='keyboard',
default=hd_trigger,
help='Hardware trigger')
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
# press type
parser.add_argument('--press_type', type=str, default='long', choices=['long','single'], help='Press type')
# recorder paramters
parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的
parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate')
parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format')
parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels')
parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name')
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec')
parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec')
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
# player paramters
parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index')
parser.add_argument('--OUT_CHUNK', type=int, default=1024, help='Player chunk size')
parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate')
parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format')
parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels')
parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name')
parser.add_argument('--OUT_frames_per_buffer', type=int, default=1024, help='Player frames per buffer')
# log paramters
parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file')
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
parser.add_argument('--debug', type=bool, default=False, help='Debug mode')
args = parser.parse_args()
# sort out args and params
server_args = {
'server_url': server_url,
'session_id': session_id,
}
recorder_args = {
'board': board,
'access_key': args.access_key,
'keywords': args.keywords,
'keyword_paths': args.keyword_paths,
'library_path': args.library_path,
'model_path': args.model_path,
'sensitivities': args.sensitivities,
'hd_trigger': args.hd_trigger,
'keyboard_key': args.keyboard_key,
'press_type': args.press_type,
'voice_trigger': args.voice_trigger,
'CHUNK': args.IN_CHUNK,
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.IN_CHANNELS,
'RATE': args.IN_RATE,
'max_slience_time': args.max_slience_time,
'min_stream_record_time': args.min_stream_record_time,
'input_device_index': args.mircophone_device,
'frames_per_buffer': args.IN_frames_per_buffer,
}
player_args = {
'output_device_index': args.speaker_device,
'CHUNK': args.OUT_CHUNK,
'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.OUT_CHANNELS,
'RATE': args.OUT_RATE,
'filename': args.OUT_filename,
'frames_per_buffer': args.OUT_frames_per_buffer,
}
log_args = {
'log_file': args.log_file,
'log_level': args.log_level,
}
localclient = WebSocketClinet(
board=board,
server_args=server_args,
recorder_args=recorder_args,
player_args=player_args,
excute_args=excute_args,
log_args=log_args)
localclient.process_init()

181
ws_client_long.py Normal file
View File

@ -0,0 +1,181 @@
from takway.clients.web_socket_client_utils import WebSocketClinet
import pvporcupine
import pyaudio
import platform
if __name__ == '__main__':
# server_url = 'ws://121.41.224.27:8000/chat'
# server_url = 'ws://39.107.254.69:33089/chat'
# server_url = 'wss://takway-ai.kingtous.cn/chat/streaming'
# server_url = 'ws://114.214.236.207:7878/chat/streaming'
server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary'
# session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed'
# session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭
# session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2
session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新
excute_args = {}
# excute_args = {'enable': True}
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
system = platform.system()
if system == 'Windows':
print("WebSocketClinet runs on Windows system.")
board = None
elif system == 'Linux':
# board = 'v329'
board = 'orangepi'
ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw=='
mircophone_device = None
speaker_device = None
if board == 'v329':
import gpiod as gpio
keywords = ['hey google', 'ok google']
keyword_paths = None
model_path = None
keywords = ['可莉可莉']
keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"]
model_path = r"picovoice_models/porcupine_params_zh.pv"
hd_trigger = 'button'
player = 'maixsense'
elif board == 'orangepi':
keywords = ['hey google', 'ok google']
keyword_paths = None
model_path = None
hd_trigger = 'button'
mircophone_device = 2
speaker_device = 2
else:
keywords = ['hey google', 'ok google']
keyword_paths = None
model_path = None
hd_trigger = 'keyboard'
player = 'opencv'
import argparse
parser = argparse.ArgumentParser()
# server params
# recorder paramters
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
parser.add_argument('--access_key',default=ACCESS_KEY,
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list,
default=keywords,
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='')
parser.add_argument('--keyword_paths',nargs='+',
default=keyword_paths,
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
parser.add_argument('--library_path',default=None,
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
parser.add_argument('--model_path',
default=model_path,
help='Absolute path to the file containing model parameters. '
'Default: using the library provided by `pvporcupine`')
parser.add_argument('--sensitivities',type=float,
default=0.9,
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
"will be used.")
parser.add_argument('--hd_trigger', type=str,
# default='keyboard',
default=hd_trigger,
help='Hardware trigger')
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
# recorder paramters
parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的
parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate')
parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format')
parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels')
parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name')
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
parser.add_argument('--min_stream_record_time', type=int, default=0.8, help='Min stream record time, sec')
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
# player paramters
parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index')
parser.add_argument('--OUT_CHUNK', type=int, default=1024, help='Player chunk size')
parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate')
parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format')
parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels')
parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name')
parser.add_argument('--OUT_frames_per_buffer', type=int, default=1024, help='Player frames per buffer')
# log paramters
parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file')
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
parser.add_argument('--debug', type=bool, default=False, help='Debug mode')
args = parser.parse_args()
# sort out args and params
server_args = {
'server_url': server_url,
'session_id': session_id,
}
recorder_args = {
'board': board,
'access_key': args.access_key,
'keywords': args.keywords,
'keyword_paths': args.keyword_paths,
'library_path': args.library_path,
'model_path': args.model_path,
'sensitivities': args.sensitivities,
'hd_trigger': args.hd_trigger,
'keyboard_key': args.keyboard_key,
'voice_trigger': args.voice_trigger,
'CHUNK': args.IN_CHUNK,
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.IN_CHANNELS,
'RATE': args.IN_RATE,
'min_stream_record_time': args.min_stream_record_time,
'input_device_index': args.mircophone_device,
'frames_per_buffer': args.IN_frames_per_buffer,
}
player_args = {
'output_device_index': args.speaker_device,
'CHUNK': args.OUT_CHUNK,
'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32,
'CHANNELS': args.OUT_CHANNELS,
'RATE': args.OUT_RATE,
'filename': args.OUT_filename,
'frames_per_buffer': args.OUT_frames_per_buffer,
}
log_args = {
'log_file': args.log_file,
'log_level': args.log_level,
}
localclient = WebSocketClinet(
board=board,
server_args=server_args,
recorder_args=recorder_args,
player_args=player_args,
excute_args=excute_args,
log_args=log_args)
localclient.process_init()