update
This commit is contained in:
commit
8255cc8c68
|
@ -0,0 +1,401 @@
|
|||
## Ignore Visual Studio temporary files, build results, and
|
||||
## files generated by popular Visual Studio add-ons.
|
||||
##
|
||||
## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
|
||||
|
||||
# User-specific files
|
||||
*.rsuser
|
||||
*.suo
|
||||
*.user
|
||||
*.userosscache
|
||||
*.sln.docstates
|
||||
|
||||
# User-specific files (MonoDevelop/Xamarin Studio)
|
||||
*.userprefs
|
||||
|
||||
# Mono auto generated files
|
||||
mono_crash.*
|
||||
|
||||
# Build results
|
||||
[Dd]ebug/
|
||||
[Dd]ebugPublic/
|
||||
[Rr]elease/
|
||||
[Rr]eleases/
|
||||
x64/
|
||||
x86/
|
||||
[Ww][Ii][Nn]32/
|
||||
[Aa][Rr][Mm]/
|
||||
[Aa][Rr][Mm]64/
|
||||
bld/
|
||||
[Bb]in/
|
||||
[Oo]bj/
|
||||
[Oo]ut/
|
||||
[Ll]og/
|
||||
[Ll]ogs/
|
||||
|
||||
# Visual Studio 2015/2017 cache/options directory
|
||||
.vs/
|
||||
# Uncomment if you have tasks that create the project's static files in wwwroot
|
||||
#wwwroot/
|
||||
|
||||
# Visual Studio 2017 auto generated files
|
||||
Generated\ Files/
|
||||
|
||||
# MSTest test Results
|
||||
[Tt]est[Rr]esult*/
|
||||
[Bb]uild[Ll]og.*
|
||||
|
||||
# NUnit
|
||||
*.VisualState.xml
|
||||
TestResult.xml
|
||||
nunit-*.xml
|
||||
|
||||
# Build Results of an ATL Project
|
||||
[Dd]ebugPS/
|
||||
[Rr]eleasePS/
|
||||
dlldata.c
|
||||
|
||||
# Benchmark Results
|
||||
BenchmarkDotNet.Artifacts/
|
||||
|
||||
# .NET Core
|
||||
project.lock.json
|
||||
project.fragment.lock.json
|
||||
artifacts/
|
||||
|
||||
# ASP.NET Scaffolding
|
||||
ScaffoldingReadMe.txt
|
||||
|
||||
# StyleCop
|
||||
StyleCopReport.xml
|
||||
|
||||
# Files built by Visual Studio
|
||||
*_i.c
|
||||
*_p.c
|
||||
*_h.h
|
||||
*.ilk
|
||||
*.meta
|
||||
*.obj
|
||||
*.iobj
|
||||
*.pch
|
||||
*.pdb
|
||||
*.ipdb
|
||||
*.pgc
|
||||
*.pgd
|
||||
*.rsp
|
||||
*.sbr
|
||||
*.tlb
|
||||
*.tli
|
||||
*.tlh
|
||||
*.tmp
|
||||
*.tmp_proj
|
||||
*_wpftmp.csproj
|
||||
*.log
|
||||
*.vspscc
|
||||
*.vssscc
|
||||
.builds
|
||||
*.pidb
|
||||
*.svclog
|
||||
*.scc
|
||||
|
||||
# Chutzpah Test files
|
||||
_Chutzpah*
|
||||
|
||||
# Visual C++ cache files
|
||||
ipch/
|
||||
*.aps
|
||||
*.ncb
|
||||
*.opendb
|
||||
*.opensdf
|
||||
*.sdf
|
||||
*.cachefile
|
||||
*.VC.db
|
||||
*.VC.VC.opendb
|
||||
|
||||
# Visual Studio profiler
|
||||
*.psess
|
||||
*.vsp
|
||||
*.vspx
|
||||
*.sap
|
||||
|
||||
# Visual Studio Trace Files
|
||||
*.e2e
|
||||
|
||||
# TFS 2012 Local Workspace
|
||||
$tf/
|
||||
|
||||
# Guidance Automation Toolkit
|
||||
*.gpState
|
||||
|
||||
# ReSharper is a .NET coding add-in
|
||||
_ReSharper*/
|
||||
*.[Rr]e[Ss]harper
|
||||
*.DotSettings.user
|
||||
|
||||
# TeamCity is a build add-in
|
||||
_TeamCity*
|
||||
|
||||
# DotCover is a Code Coverage Tool
|
||||
*.dotCover
|
||||
|
||||
# AxoCover is a Code Coverage Tool
|
||||
.axoCover/*
|
||||
!.axoCover/settings.json
|
||||
|
||||
# Coverlet is a free, cross platform Code Coverage Tool
|
||||
coverage*.json
|
||||
coverage*.xml
|
||||
coverage*.info
|
||||
|
||||
# Visual Studio code coverage results
|
||||
*.coverage
|
||||
*.coveragexml
|
||||
|
||||
# NCrunch
|
||||
_NCrunch_*
|
||||
.*crunch*.local.xml
|
||||
nCrunchTemp_*
|
||||
|
||||
# MightyMoose
|
||||
*.mm.*
|
||||
AutoTest.Net/
|
||||
|
||||
# Web workbench (sass)
|
||||
.sass-cache/
|
||||
|
||||
# Installshield output folder
|
||||
[Ee]xpress/
|
||||
|
||||
# DocProject is a documentation generator add-in
|
||||
DocProject/buildhelp/
|
||||
DocProject/Help/*.HxT
|
||||
DocProject/Help/*.HxC
|
||||
DocProject/Help/*.hhc
|
||||
DocProject/Help/*.hhk
|
||||
DocProject/Help/*.hhp
|
||||
DocProject/Help/Html2
|
||||
DocProject/Help/html
|
||||
|
||||
# Click-Once directory
|
||||
publish/
|
||||
|
||||
# Publish Web Output
|
||||
*.[Pp]ublish.xml
|
||||
*.azurePubxml
|
||||
# Note: Comment the next line if you want to checkin your web deploy settings,
|
||||
# but database connection strings (with potential passwords) will be unencrypted
|
||||
*.pubxml
|
||||
*.publishproj
|
||||
|
||||
# Microsoft Azure Web App publish settings. Comment the next line if you want to
|
||||
# checkin your Azure Web App publish settings, but sensitive information contained
|
||||
# in these scripts will be unencrypted
|
||||
PublishScripts/
|
||||
|
||||
# NuGet Packages
|
||||
*.nupkg
|
||||
# NuGet Symbol Packages
|
||||
*.snupkg
|
||||
# The packages folder can be ignored because of Package Restore
|
||||
**/[Pp]ackages/*
|
||||
# except build/, which is used as an MSBuild target.
|
||||
!**/[Pp]ackages/build/
|
||||
# Uncomment if necessary however generally it will be regenerated when needed
|
||||
#!**/[Pp]ackages/repositories.config
|
||||
# NuGet v3's project.json files produces more ignorable files
|
||||
*.nuget.props
|
||||
*.nuget.targets
|
||||
|
||||
# Microsoft Azure Build Output
|
||||
csx/
|
||||
*.build.csdef
|
||||
|
||||
# Microsoft Azure Emulator
|
||||
ecf/
|
||||
rcf/
|
||||
|
||||
# Windows Store app package directories and files
|
||||
AppPackages/
|
||||
BundleArtifacts/
|
||||
Package.StoreAssociation.xml
|
||||
_pkginfo.txt
|
||||
*.appx
|
||||
*.appxbundle
|
||||
*.appxupload
|
||||
|
||||
# Visual Studio cache files
|
||||
# files ending in .cache can be ignored
|
||||
*.[Cc]ache
|
||||
# but keep track of directories ending in .cache
|
||||
!?*.[Cc]ache/
|
||||
|
||||
# Others
|
||||
ClientBin/
|
||||
~$*
|
||||
*~
|
||||
*.dbmdl
|
||||
*.dbproj.schemaview
|
||||
*.jfm
|
||||
*.pfx
|
||||
*.publishsettings
|
||||
orleans.codegen.cs
|
||||
|
||||
# Including strong name files can present a security risk
|
||||
# (https://github.com/github/gitignore/pull/2483#issue-259490424)
|
||||
#*.snk
|
||||
|
||||
# Since there are multiple workflows, uncomment next line to ignore bower_components
|
||||
# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
|
||||
#bower_components/
|
||||
|
||||
# RIA/Silverlight projects
|
||||
Generated_Code/
|
||||
|
||||
# Backup & report files from converting an old project file
|
||||
# to a newer Visual Studio version. Backup files are not needed,
|
||||
# because we have git ;-)
|
||||
_UpgradeReport_Files/
|
||||
Backup*/
|
||||
UpgradeLog*.XML
|
||||
UpgradeLog*.htm
|
||||
ServiceFabricBackup/
|
||||
*.rptproj.bak
|
||||
|
||||
# SQL Server files
|
||||
*.mdf
|
||||
*.ldf
|
||||
*.ndf
|
||||
|
||||
# Business Intelligence projects
|
||||
*.rdl.data
|
||||
*.bim.layout
|
||||
*.bim_*.settings
|
||||
*.rptproj.rsuser
|
||||
*- [Bb]ackup.rdl
|
||||
*- [Bb]ackup ([0-9]).rdl
|
||||
*- [Bb]ackup ([0-9][0-9]).rdl
|
||||
|
||||
# Microsoft Fakes
|
||||
FakesAssemblies/
|
||||
|
||||
# GhostDoc plugin setting file
|
||||
*.GhostDoc.xml
|
||||
|
||||
# Node.js Tools for Visual Studio
|
||||
.ntvs_analysis.dat
|
||||
node_modules/
|
||||
|
||||
# Visual Studio 6 build log
|
||||
*.plg
|
||||
|
||||
# Visual Studio 6 workspace options file
|
||||
*.opt
|
||||
|
||||
# Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
|
||||
*.vbw
|
||||
|
||||
# Visual Studio LightSwitch build output
|
||||
**/*.HTMLClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/GeneratedArtifacts
|
||||
**/*.DesktopClient/ModelManifest.xml
|
||||
**/*.Server/GeneratedArtifacts
|
||||
**/*.Server/ModelManifest.xml
|
||||
_Pvt_Extensions
|
||||
|
||||
# Paket dependency manager
|
||||
.paket/paket.exe
|
||||
paket-files/
|
||||
|
||||
# FAKE - F# Make
|
||||
.fake/
|
||||
|
||||
# CodeRush personal settings
|
||||
.cr/personal
|
||||
|
||||
# Python Tools for Visual Studio (PTVS)
|
||||
__pycache__/
|
||||
|
||||
|
||||
# Cake - Uncomment if you are using it
|
||||
# tools/**
|
||||
# !tools/packages.config
|
||||
|
||||
# Tabs Studio
|
||||
*.tss
|
||||
|
||||
# Telerik's JustMock configuration file
|
||||
*.jmconfig
|
||||
|
||||
# BizTalk build output
|
||||
*.btp.cs
|
||||
*.btm.cs
|
||||
*.odx.cs
|
||||
*.xsd.cs
|
||||
|
||||
# OpenCover UI analysis results
|
||||
OpenCover/
|
||||
|
||||
# Azure Stream Analytics local run output
|
||||
ASALocalRun/
|
||||
|
||||
# MSBuild Binary and Structured Log
|
||||
*.binlog
|
||||
|
||||
# NVidia Nsight GPU debugger configuration file
|
||||
*.nvuser
|
||||
|
||||
# MFractors (Xamarin productivity tool) working folder
|
||||
.mfractor/
|
||||
|
||||
# Local History for Visual Studio
|
||||
.localhistory/
|
||||
|
||||
# BeatPulse healthcheck temp database
|
||||
healthchecksdb
|
||||
|
||||
# Backup folder for Package Reference Convert tool in Visual Studio 2017
|
||||
MigrationBackup/
|
||||
|
||||
# Ionide (cross platform F# VS Code tools) working folder
|
||||
.ionide/
|
||||
|
||||
# Fody - auto-generated XML schema
|
||||
FodyWeavers.xsd
|
||||
|
||||
# build
|
||||
build
|
||||
monotonic_align/core.c
|
||||
*.o
|
||||
*.so
|
||||
*.dll
|
||||
|
||||
# data
|
||||
/config.json
|
||||
/*.pth
|
||||
*.wav
|
||||
*.mp3
|
||||
/resources
|
||||
/MoeGoe.spec
|
||||
/dist/MoeGoe
|
||||
/dist
|
||||
Gao/
|
||||
.idea
|
||||
|
||||
|
||||
# custom
|
||||
vits_model
|
||||
model/*.pth
|
||||
vosk-model-*
|
||||
whisper*
|
||||
temp
|
||||
*.whl
|
||||
*demo*
|
||||
Emoji/
|
||||
api-for-open-llm/models
|
||||
datasets/
|
||||
ChatHaruhi-Expand-118K/
|
||||
|
||||
!examples/example_recording.wav
|
||||
|
||||
music/
|
||||
story
|
|
@ -0,0 +1,48 @@
|
|||
|
||||
## 客户端 `Client`前端
|
||||
|
||||
### 硬件交互前端服务
|
||||
|
||||
#### 介绍
|
||||
|
||||
硬件交互前端服务是基于`WebSocket`协议的服务,用于与硬件设备进行通信。
|
||||
|
||||
### 环境准备
|
||||
|
||||
#### (1) 安装依赖项:
|
||||
|
||||
```
|
||||
sudo apt-get update
|
||||
sudo apt-get upgrade
|
||||
sudo apt-get install cmake g++ gcc
|
||||
sudo apt-get install python3-pip python3-dev portaudio19-dev libsndfile1
|
||||
```
|
||||
|
||||
#### (2) 克隆项目到本地并安装依赖:
|
||||
|
||||
```
|
||||
// 克隆项目到本地 https or ssh
|
||||
git clone https://github.com/Irvingao/takway_base.git or git clone git@github.com:Irvingao/takway_base.git
|
||||
cd takway_base
|
||||
pip install -v -e .
|
||||
```
|
||||
|
||||
#### (3) 安装板载端环境依赖:
|
||||
|
||||
```
|
||||
pip install -r requirements/board_requirements.txt
|
||||
```
|
||||
|
||||
#### (4) 下载相关模型文件:
|
||||
|
||||
- [Picovoice:](https://picovoice.ai/docs/quick-start/porcupine-python/)边缘端关键词唤醒模型
|
||||
|
||||
```
|
||||
目前使用的是`可莉可莉_zh_raspberry-pi_v3_0_0`版本,可以点击网站自行设置替换关键词模型。
|
||||
```
|
||||
|
||||
#### (5) 启动服务:
|
||||
|
||||
```
|
||||
python ws_client.py
|
||||
```
|
|
@ -0,0 +1,24 @@
|
|||
#!/bin/bash
|
||||
|
||||
# 给bashrc添加自启动python脚本
|
||||
# echo "nohup python3 /home/orangepi/takway_base/ws_client.py& " >> ~/.bashrc
|
||||
|
||||
# update system
|
||||
sudo -S apt-get update -y << EOF
|
||||
orangepi
|
||||
EOF
|
||||
|
||||
# install dependencies
|
||||
sudo apt-get install -y cmake g++ gcc
|
||||
sudo apt-get install -y python3-pip python3-dev portaudio19-dev libsndfile1
|
||||
|
||||
# install python dependencies
|
||||
# git clone https://Irvingao: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@github.com/Irvingao/takway_base.git
|
||||
git clone http://43.132.157.186:3000/gaohz/TakwayBoard.git
|
||||
cd TakwayBoard
|
||||
# git remote set-url origin https://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@github.com/Irvingao/takway_base.git
|
||||
pip install -v -e .
|
||||
|
||||
pip install -r requirements.txt
|
||||
|
||||
reboot
|
|
@ -0,0 +1,8 @@
|
|||
numpy
|
||||
Unidecode
|
||||
pyaudio
|
||||
keyboard
|
||||
websocket-client
|
||||
pvporcupine
|
||||
requests
|
||||
webrtcvad
|
|
@ -0,0 +1,15 @@
|
|||
# setup.py
|
||||
|
||||
from setuptools import setup, find_packages
|
||||
|
||||
setup(
|
||||
name='takway',
|
||||
version='1.0',
|
||||
packages=find_packages(), # 自动发现包和子包
|
||||
url='https://github.com/Irvingao/takway_base',
|
||||
classifiers=[
|
||||
'Programming Language :: Python :: 3',
|
||||
'License :: OSI Approved :: MIT License',
|
||||
'Operating System :: OS Independent',
|
||||
],
|
||||
)
|
|
@ -0,0 +1,13 @@
|
|||
Metadata-Version: 1.1
|
||||
Name: takway
|
||||
Version: 1.0
|
||||
Summary: UNKNOWN
|
||||
Home-page: https://github.com/Irvingao/takway_base
|
||||
Author: UNKNOWN
|
||||
Author-email: UNKNOWN
|
||||
License: UNKNOWN
|
||||
Description: UNKNOWN
|
||||
Platform: UNKNOWN
|
||||
Classifier: Programming Language :: Python :: 3
|
||||
Classifier: License :: OSI Approved :: MIT License
|
||||
Classifier: Operating System :: OS Independent
|
|
@ -0,0 +1,21 @@
|
|||
README.md
|
||||
setup.py
|
||||
takway/__init__.py
|
||||
takway/audio_utils.py
|
||||
takway/cam_utils.py
|
||||
takway/common_utils.py
|
||||
takway/emo_utils.py
|
||||
takway/picovoice_utils.py
|
||||
takway.egg-info/PKG-INFO
|
||||
takway.egg-info/SOURCES.txt
|
||||
takway.egg-info/dependency_links.txt
|
||||
takway.egg-info/top_level.txt
|
||||
takway/board/__init__.py
|
||||
takway/board/base_hd.py
|
||||
takway/board/keyboard.py
|
||||
takway/board/orangepi.py
|
||||
takway/board/sipeed.py
|
||||
takway/clients/__init__.py
|
||||
takway/clients/client_utils.py
|
||||
takway/clients/local_client_utils.py
|
||||
takway/clients/web_socket_client_utils.py
|
|
@ -0,0 +1 @@
|
|||
|
|
@ -0,0 +1 @@
|
|||
takway
|
|
@ -0,0 +1,133 @@
|
|||
import multiprocessing
|
||||
import queue
|
||||
from typing import Iterator
|
||||
import time
|
||||
import json
|
||||
import requests
|
||||
|
||||
class QueueIterator:
|
||||
def __init__(self,
|
||||
queue,
|
||||
timeout: int = 10):
|
||||
self.queue = queue
|
||||
self.timeout = timeout
|
||||
|
||||
def __iter__(self) -> Iterator:
|
||||
return self
|
||||
|
||||
def __next__(self):
|
||||
try:
|
||||
data = self.queue.get(block=True, timeout=self.timeout)
|
||||
if data is None: # 使用None作为结束信号
|
||||
# print("QueueIterator: End of data")
|
||||
raise StopIteration
|
||||
else:
|
||||
# print("QueueIterator: Get data")
|
||||
return data
|
||||
except queue.Empty:
|
||||
# print("QueueIterator: Queue is empty")
|
||||
raise StopIteration
|
||||
|
||||
|
||||
|
||||
|
||||
def producer(queue: multiprocessing.Queue):
|
||||
for i in range(5): # 假设生产5个数据项
|
||||
data = {'data': i, 'is_end': False}
|
||||
queue.put(data)
|
||||
time.sleep(1)
|
||||
queue.put(None) # 发送结束信号
|
||||
|
||||
def get_stream_data_from_queue(queue: multiprocessing.Queue):
|
||||
for data in QueueIterator(queue):
|
||||
print(data)
|
||||
yield json.dumps({'line': data, 'is_end': False})
|
||||
# 模拟数据处理时间
|
||||
|
||||
if __name__ == "__main__":
|
||||
queue = multiprocessing.Queue()
|
||||
# 创建并启动生产者进程
|
||||
p = multiprocessing.Process(target=producer, args=(queue,))
|
||||
p.start()
|
||||
|
||||
# 使用迭代器来消费Queue中的数据
|
||||
for data in QueueIterator(queue):
|
||||
print(data)
|
||||
|
||||
# 等待生产者进程结束
|
||||
p.join()
|
||||
|
||||
|
||||
'''
|
||||
# request body
|
||||
{
|
||||
"AUTH_INFO": {
|
||||
"user": "", # string
|
||||
"authid": "", # string
|
||||
"api_keys": "", # string
|
||||
"timestamp": "", # string
|
||||
},
|
||||
"DATA": {
|
||||
"Audio": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"rate": ; # int
|
||||
"channels": ; # int
|
||||
"format": ; # int
|
||||
}
|
||||
},
|
||||
"Text": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
# TODO
|
||||
}
|
||||
},
|
||||
"Image": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"width": ; # int
|
||||
"height": ; # int
|
||||
"format": ; # string
|
||||
}
|
||||
}
|
||||
}
|
||||
"META_INFO": {
|
||||
"model_type": "", # string
|
||||
"model_version": "", # string
|
||||
"model_url": "", # string
|
||||
"vits": {
|
||||
"speaker_id": ; # int
|
||||
}
|
||||
}
|
||||
|
||||
# response body
|
||||
{
|
||||
RESPONSE_INFO: {
|
||||
"status": "success/error", # string
|
||||
"message": "xxxxx", # string
|
||||
}
|
||||
"DATA": {
|
||||
"Audio": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"rate": ; # int
|
||||
"channels": ; # int
|
||||
"format": ; # int
|
||||
}
|
||||
},
|
||||
"Text": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"is_end": True/False, # bool
|
||||
}
|
||||
}
|
||||
"Image": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"width": ; # int
|
||||
"height": ; # int
|
||||
"format": ; # string
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
|
@ -0,0 +1,578 @@
|
|||
import io
|
||||
import os
|
||||
import time
|
||||
import pyaudio
|
||||
import wave
|
||||
import json
|
||||
import warnings
|
||||
import threading
|
||||
import numpy as np
|
||||
from collections import deque
|
||||
|
||||
from .common_utils import encode_bytes2str, decode_str2bytes
|
||||
|
||||
from takway.board import *
|
||||
try:
|
||||
import keyboard
|
||||
except:
|
||||
pass
|
||||
|
||||
def play_audio(audio_data, type='base64'):
|
||||
'''
|
||||
读取base64编码的音频流并播放
|
||||
'''
|
||||
# PyAudio配置
|
||||
p = pyaudio.PyAudio()
|
||||
stream = p.open(format=pyaudio.paInt16, channels=1, rate=22050, output=True)
|
||||
|
||||
# 播放音频
|
||||
stream.write(audio_data)
|
||||
stream.stop_stream()
|
||||
stream.close()
|
||||
p.terminate()
|
||||
|
||||
'''
|
||||
import librosa
|
||||
def reshape_sample_rate(audio, sr_original=None, sr_target=16000):
|
||||
# 获取原始采样率和音频数据
|
||||
if isinstance(audio, tuple):
|
||||
sr_original, audio_data = audio
|
||||
elif isinstance(audio, bytes):
|
||||
audio_data = np.frombuffer(audio, dtype=np.int16)
|
||||
assert sr_original is not None, f"sr_original should be provided if audio is a \
|
||||
numpy.ndarray, but got sr_original `{sr_original}`."
|
||||
|
||||
if isinstance(audio_data, np.ndarray):
|
||||
if audio_data.dtype == np.dtype('int16'):
|
||||
audio_data = audio_data.astype(np.float32) / np.iinfo(np.int16).max
|
||||
assert audio_data.dtype == np.dtype('float32'), f"audio_data should be float32, \
|
||||
but got {audio_data.dtype}."
|
||||
else:
|
||||
raise TypeError(f"audio_data should be numpy.ndarray, but got {type(audio_data)}.")
|
||||
|
||||
# 重新采样音频数据
|
||||
audio_data_resampled = librosa.resample(audio_data, orig_sr=sr_original, target_sr=sr_target)
|
||||
|
||||
if audio_data_resampled.dtype == np.dtype('float32'):
|
||||
audio_data_resampled = np.int16(audio_data_resampled * np.iinfo(np.int16).max)
|
||||
|
||||
# If the input was bytes, return the resampled data as bytes
|
||||
if isinstance(audio, bytes):
|
||||
audio_data_resampled = audio_data_resampled.tobytes()
|
||||
|
||||
return audio_data_resampled
|
||||
|
||||
# Example usage:
|
||||
# If your audio data is in bytes:
|
||||
# audio_bytes = b'...' # Your audio data as bytes
|
||||
# audio_data_resampled = reshape_sample_rate(audio_bytes)
|
||||
|
||||
# If your audio data is in numpy int16:
|
||||
# audio_int16 = np.array([...], dtype=np.int16) # Your audio data as numpy int16
|
||||
# audio_data_resampled = reshape_sample_rate(audio_int16)
|
||||
'''
|
||||
|
||||
|
||||
|
||||
# ####################################################### #
|
||||
# base audio class
|
||||
# ####################################################### #
|
||||
|
||||
class BaseAudio:
|
||||
def __init__(self,
|
||||
filename=None,
|
||||
input=False,
|
||||
output=False,
|
||||
CHUNK=1024,
|
||||
FORMAT=pyaudio.paInt16,
|
||||
CHANNELS=1,
|
||||
RATE=16000,
|
||||
input_device_index=None,
|
||||
output_device_index=None,
|
||||
**kwargs):
|
||||
self.CHUNK = CHUNK
|
||||
self.FORMAT = FORMAT
|
||||
self.CHANNELS = CHANNELS
|
||||
self.RATE = RATE
|
||||
self.filename = filename
|
||||
assert input!= output, "input and output cannot be the same, \
|
||||
but got input={} and output={}.".format(input, output)
|
||||
print("------------------------------------------")
|
||||
print(f"{'Input' if input else 'Output'} Audio Initialization: ")
|
||||
print(f"CHUNK: {self.CHUNK} \nFORMAT: {self.FORMAT} \nCHANNELS: {self.CHANNELS} \nRATE: {self.RATE} \ninput_device_index: {input_device_index} \noutput_device_index: {output_device_index}")
|
||||
print("------------------------------------------")
|
||||
self.p = pyaudio.PyAudio()
|
||||
self.stream = self.p.open(format=FORMAT,
|
||||
channels=CHANNELS,
|
||||
rate=RATE,
|
||||
input=input,
|
||||
output=output,
|
||||
input_device_index=input_device_index,
|
||||
output_device_index=output_device_index,
|
||||
**kwargs)
|
||||
|
||||
def load_audio_file(self, wav_file):
|
||||
with wave.open(wav_file, 'rb') as wf:
|
||||
params = wf.getparams()
|
||||
frames = wf.readframes(params.nframes)
|
||||
print("Audio file loaded.")
|
||||
# Audio Parameters
|
||||
# print("Channels:", params.nchannels)
|
||||
# print("Sample width:", params.sampwidth)
|
||||
# print("Frame rate:", params.framerate)
|
||||
# print("Number of frames:", params.nframes)
|
||||
# print("Compression type:", params.comptype)
|
||||
return frames
|
||||
|
||||
def check_audio_type(self, audio_data, return_type=None):
|
||||
assert return_type in ['bytes', 'io', None], \
|
||||
"return_type should be 'bytes', 'io' or None."
|
||||
if isinstance(audio_data, str):
|
||||
if len(audio_data) > 50:
|
||||
audio_data = decode_str2bytes(audio_data)
|
||||
else:
|
||||
assert os.path.isfile(audio_data), \
|
||||
"audio_data should be a file path or a bytes object."
|
||||
wf = wave.open(audio_data, 'rb')
|
||||
audio_data = wf.readframes(wf.getnframes())
|
||||
elif isinstance(audio_data, np.ndarray):
|
||||
if audio_data.dtype == np.dtype('float32'):
|
||||
audio_data = np.int16(audio_data * np.iinfo(np.int16).max)
|
||||
audio_data = audio_data.tobytes()
|
||||
elif isinstance(audio_data, bytes):
|
||||
pass
|
||||
else:
|
||||
raise TypeError(f"audio_data must be bytes, numpy.ndarray or str, \
|
||||
but got {type(audio_data)}")
|
||||
|
||||
if return_type == None:
|
||||
return audio_data
|
||||
return self.write_wave(None, [audio_data], return_type)
|
||||
|
||||
def write_wave(self, filename, frames, return_type='io'):
|
||||
"""Write audio data to a file."""
|
||||
if isinstance(frames, bytes):
|
||||
frames = [frames]
|
||||
if not isinstance(frames, list):
|
||||
raise TypeError("frames should be \
|
||||
a list of bytes or a bytes object, \
|
||||
but got {}.".format(type(frames)))
|
||||
|
||||
if return_type == 'io':
|
||||
if filename is None:
|
||||
filename = io.BytesIO()
|
||||
if self.filename:
|
||||
filename = self.filename
|
||||
return self.write_wave_io(filename, frames)
|
||||
elif return_type == 'bytes':
|
||||
return self.write_wave_bytes(frames)
|
||||
|
||||
|
||||
def write_wave_io(self, filename, frames):
|
||||
"""
|
||||
Write audio data to a file-like object.
|
||||
|
||||
Args:
|
||||
filename: [string or file-like object], file path or file-like object to write
|
||||
frames: list of bytes, audio data to write
|
||||
"""
|
||||
wf = wave.open(filename, 'wb')
|
||||
|
||||
# 设置WAV文件的参数
|
||||
wf.setnchannels(self.CHANNELS)
|
||||
wf.setsampwidth(self.p.get_sample_size(self.FORMAT))
|
||||
wf.setframerate(self.RATE)
|
||||
wf.writeframes(b''.join(frames))
|
||||
wf.close()
|
||||
if isinstance(filename, io.BytesIO):
|
||||
filename.seek(0) # reset file pointer to beginning
|
||||
return filename
|
||||
|
||||
def write_wave_bytes(self, frames):
|
||||
"""Write audio data to a bytes object."""
|
||||
return b''.join(frames)
|
||||
|
||||
|
||||
# ####################################################### #
|
||||
# play audio data from Speaker
|
||||
# ####################################################### #
|
||||
|
||||
class AudioPlayer(BaseAudio):
|
||||
def __init__(self,
|
||||
RATE=22050,
|
||||
**kwargs):
|
||||
super().__init__(output=True, RATE=RATE, **kwargs)
|
||||
|
||||
def play(self, audio_data):
|
||||
# print("Playing audio data...")
|
||||
audio_data = self.check_audio_type(audio_data, return_type=None)
|
||||
|
||||
for i in range(0, len(audio_data), self.CHUNK):
|
||||
self.stream.write(audio_data[i:i+self.CHUNK])
|
||||
# print("Playing audio data...{}/{}".format(i, len(audio_data)))
|
||||
self.stream.write(audio_data[i+self.CHUNK:])
|
||||
# print("Audio data played.")
|
||||
|
||||
|
||||
def close(self):
|
||||
self.stream.stop_stream()
|
||||
self.stream.close()
|
||||
self.p.terminate()
|
||||
|
||||
# ####################################################### #
|
||||
# record audio data from microphone
|
||||
# ####################################################### #
|
||||
class BaseRecorder(BaseAudio):
|
||||
def __init__(self,
|
||||
input=True,
|
||||
base_chunk_size=None,
|
||||
RATE=16000,
|
||||
**kwargs):
|
||||
super().__init__(input=input, RATE=RATE, **kwargs)
|
||||
self.base_chunk_size = base_chunk_size
|
||||
if base_chunk_size is None:
|
||||
self.base_chunk_size = self.CHUNK
|
||||
|
||||
def record(self,
|
||||
filename,
|
||||
duration=5,
|
||||
return_type='io',
|
||||
logger=None):
|
||||
if logger is not None:
|
||||
logger.info("Recording started.")
|
||||
else:
|
||||
print("Recording started.")
|
||||
frames = []
|
||||
for i in range(0, int(self.RATE / self.CHUNK * duration)):
|
||||
data = self.stream.read(self.CHUNK, exception_on_overflow=False)
|
||||
frames.append(data)
|
||||
if logger is not None:
|
||||
logger.info("Recording stopped.")
|
||||
else:
|
||||
print("Recording stopped.")
|
||||
return self.write_wave(filename, frames, return_type)
|
||||
|
||||
def record_chunk_voice(self,
|
||||
return_type='bytes',
|
||||
CHUNK=None,
|
||||
exception_on_overflow=True,
|
||||
queue=None):
|
||||
data = self.stream.read(self.CHUNK if CHUNK is None else CHUNK,
|
||||
exception_on_overflow=exception_on_overflow)
|
||||
if return_type is not None:
|
||||
return self.write_wave(None, [data], return_type)
|
||||
return data
|
||||
|
||||
|
||||
class HDRecorder(BaseRecorder):
|
||||
def __init__(self,
|
||||
board=None,
|
||||
hd_trigger='keyboard',
|
||||
keyboard_key='space',
|
||||
voice_trigger=True,
|
||||
hd_chunk_size=None,
|
||||
hd_detect_threshold=50,
|
||||
**kwargs):
|
||||
super().__init__(**kwargs)
|
||||
assert hd_trigger in ['keyboard', 'button']
|
||||
|
||||
self.hd_trigger = hd_trigger
|
||||
self.voice_trigger = voice_trigger
|
||||
|
||||
self.hd_chunk_size = hd_chunk_size
|
||||
if hd_chunk_size is None:
|
||||
self.hd_chunk_size = self.base_chunk_size
|
||||
|
||||
if board == None:
|
||||
assert hd_trigger == 'keyboard', "board should be `None` if hd_trigger is `keyboard`."
|
||||
self.keyboard_key = keyboard_key
|
||||
self.hardware = Keyboard(hd_trigger, keyboard_key, hd_detect_threshold)
|
||||
else:
|
||||
assert hd_trigger == 'button', f"hd_trigger should be `button` if board is `v329` or `orangepi`, but got `{hd_trigger}`."
|
||||
if board == 'v329':
|
||||
self.hardware = V329(hd_trigger, hd_detect_threshold)
|
||||
elif board == 'orangepi':
|
||||
self.hardware = OrangePi(hd_trigger, hd_detect_threshold)
|
||||
print(f"Using {hd_trigger} as hardware trigger.")
|
||||
|
||||
def wait_for_hardware_pressed(self):
|
||||
return self.hardware.wait_for_hardware_pressed()
|
||||
|
||||
@property
|
||||
def is_hardware_pressed(self):
|
||||
return self.hardware.is_hardware_pressed
|
||||
|
||||
def record_hardware(self, return_type='bytes'):
|
||||
"""record audio when hardware trigger"""
|
||||
print("Recording started for hardware trigger.")
|
||||
frames = []
|
||||
self.wait_for_hardware_pressed()
|
||||
while True:
|
||||
if self.hd_trigger == 'keyboard':
|
||||
if keyboard.is_pressed(self.keyboard_key):
|
||||
print("recording...")
|
||||
data = self.record_chunk_voice(
|
||||
CHUNK=self.CHUNK,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
frames.append(data)
|
||||
else:
|
||||
break
|
||||
print("Recording stopped.")
|
||||
elif self.hd_trigger == 'button':
|
||||
if self.get_button_status():
|
||||
data = self.stream.read(self.CHUNK)
|
||||
frames.append(data)
|
||||
else:
|
||||
break
|
||||
else:
|
||||
recording = False
|
||||
raise ValueError("hd_trigger should be 'keyboard' or 'button'.")
|
||||
return self.write_wave(self.filename, frames, return_type)
|
||||
|
||||
'''
|
||||
def record(self, return_type='bytes', queue=None):
|
||||
if self.hd_trigger == 'all':
|
||||
value_list = [] # 用于记录value的状态
|
||||
if keyboard.is_pressed(self.keyboard_key):
|
||||
audio_data = self.record_keyboard(return_type, queue)
|
||||
elif self.button.get_value() == 0:
|
||||
if self.get_button_status():
|
||||
audio_data = self.record_button(return_type, queue)
|
||||
else:
|
||||
audio_data = self.record_voice(return_type, queue)
|
||||
elif self.hd_trigger == 'keyboard':
|
||||
print("Press SPACE to start recording.")
|
||||
keyboard.wait("space")
|
||||
audio_data = self.record_keyboard(return_type, queue)
|
||||
elif self.hd_trigger == 'button':
|
||||
print("Touch to start recording...")
|
||||
if self.button.get_value() == 0:
|
||||
if self.get_button_status():
|
||||
audio_data = self.record_button(return_type, queue)
|
||||
else:
|
||||
audio_data = self.record_voice(return_type, queue)
|
||||
|
||||
return audio_data
|
||||
|
||||
def record_keyboard(self, return_type='bytes', queue=None):
|
||||
"""record audio when keyboard pressing"""
|
||||
print("Recording started.")
|
||||
frames = []
|
||||
recording = True
|
||||
while recording:
|
||||
if keyboard.is_pressed(self.keyboard_key):
|
||||
data = self.stream.read(self.CHUNK)
|
||||
frames.append(data)
|
||||
else:
|
||||
recording = False
|
||||
print("Recording stopped.")
|
||||
return self.write_wave(self.filename, frames, return_type)
|
||||
|
||||
def record_button(self, return_type='bytes', queue=None):
|
||||
"""record audio when button pressing"""
|
||||
print("Recording started.")
|
||||
frames = []
|
||||
recording = True
|
||||
while recording:
|
||||
value = self.button.get_value()
|
||||
if value == 0:
|
||||
data = self.stream.read(CHUNK)
|
||||
frames.append(data)
|
||||
else:
|
||||
recording = False
|
||||
print("Recording stopped.")
|
||||
return self.write_wave(self.filename, frames, return_type)
|
||||
'''
|
||||
|
||||
# ####################################################### #
|
||||
# record audio data from microphone with VAD
|
||||
# ####################################################### #
|
||||
try:
|
||||
import webrtcvad
|
||||
webrtcvad_available = True
|
||||
except:
|
||||
warnings.warn("webrtcvad module not found, please install it if use `vad` hd_trigger.")
|
||||
webrtcvad_available = False
|
||||
|
||||
class VADRecorder(HDRecorder):
|
||||
def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=1,**kwargs):
|
||||
super().__init__(**kwargs)
|
||||
if webrtcvad_available:
|
||||
self.vad = webrtcvad.Vad(vad_sensitivity)
|
||||
self.vad_buffer_size = vad_buffer_size
|
||||
self.vad_chunk_size = int(self.RATE * frame_duration / 1000)
|
||||
|
||||
self.min_act_time = min_act_time # 最小活动时间,单位秒
|
||||
|
||||
self.is_currently_speaking = False
|
||||
self.frames = []
|
||||
|
||||
def is_speech(self, data):
|
||||
return self.vad.is_speech(data, self.RATE)
|
||||
|
||||
def vad_filter(self, data):
|
||||
pass
|
||||
|
||||
|
||||
def vad_record(self, return_type='io', CHUNK=None, queue=None, save_file=False):
|
||||
"""录音并进行语音活动检测人声并返回分割后的音频数据"""
|
||||
all_frames = []
|
||||
|
||||
buffer_size = self.vad_buffer_size
|
||||
active_buffer = deque([False for i in range(buffer_size)], maxlen=buffer_size)
|
||||
audio_buffer = deque(maxlen=buffer_size)
|
||||
silence_buffer = deque([True for i in range(buffer_size)], maxlen=buffer_size)
|
||||
|
||||
print("vad_recorded_audio VAD started. Press Ctrl+C to stop.")
|
||||
try:
|
||||
while True:
|
||||
data = self.stream.read(self.vad_chunk_size)
|
||||
all_frames.append(data)
|
||||
print(f"VAD processing..., is_speech: {self.is_speech(data)}")
|
||||
if self.is_speech(data):
|
||||
# 标志位buffer
|
||||
active_buffer.append(True); active_buffer.popleft()
|
||||
silence_buffer.append(False); silence_buffer.popleft()
|
||||
# 暂时增加到buffer中
|
||||
audio_buffer.append(data)
|
||||
# 如果满足检测要求
|
||||
if all(active_buffer):
|
||||
if not self.is_currently_speaking:
|
||||
print("Speech start detected")
|
||||
self.is_currently_speaking = True
|
||||
self.frames.extend(audio_buffer) # 把说话的buffer也加上
|
||||
if self.is_currently_speaking:
|
||||
self.frames.append(data)
|
||||
else:
|
||||
# 标志位buffer
|
||||
active_buffer.append(False); active_buffer.popleft()
|
||||
silence_buffer.append(True); silence_buffer.popleft()
|
||||
# 检测到人声并持续录音
|
||||
if self.is_currently_speaking:
|
||||
# 结束标志位
|
||||
if all(silence_buffer):
|
||||
print("Speech end detected")
|
||||
break
|
||||
except KeyboardInterrupt:
|
||||
print("KeyboardInterrupt")
|
||||
|
||||
finally:
|
||||
print("Stopping...")
|
||||
if len(all_frames) > 0:
|
||||
print(f"ALL frame: {len(all_frames)}")
|
||||
print(f"ASR frame: {len(self.frames)}")
|
||||
if save_file:
|
||||
self.write_wave(f"output_{time.time()}_all.wav", all_frames)
|
||||
self.write_wave(f"output_{time.time()}.wav", self.frames)
|
||||
return self.write_wave(None, self.frames, return_type='bytes')
|
||||
|
||||
|
||||
# ####################################################### #
|
||||
# record audio data from microphone with PicoVoice hot words detection
|
||||
# ####################################################### #
|
||||
|
||||
import struct
|
||||
from datetime import datetime
|
||||
import pvporcupine
|
||||
|
||||
class PicovoiceRecorder(VADRecorder):
|
||||
def __init__(self,
|
||||
access_key,
|
||||
keywords=None,
|
||||
keyword_paths=None,
|
||||
model_path=None,
|
||||
sensitivities=0.5,
|
||||
library_path=None,
|
||||
**kwargs):
|
||||
|
||||
super().__init__(**kwargs)
|
||||
|
||||
pico_cfg = dict(
|
||||
access_key=access_key,
|
||||
keywords=keywords,
|
||||
keyword_paths=keyword_paths,
|
||||
model_path=model_path,
|
||||
sensitivities=sensitivities,
|
||||
library_path=library_path,
|
||||
)
|
||||
|
||||
self.pico_detector_init(pico_cfg)
|
||||
|
||||
self.keywords = self.pico_cfg['keywords']
|
||||
print(f"PicovoiceRecorder initialized with keywords: {self.keywords}")
|
||||
|
||||
def pico_detector_init(self, pico_cfg):
|
||||
if pico_cfg['keyword_paths'] is None:
|
||||
if pico_cfg['keywords'] is None:
|
||||
raise ValueError(f"Either `--keywords` or `--keyword_paths` must be set. \
|
||||
Available keywords: {list(pvporcupine.KEYWORDS)}")
|
||||
|
||||
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in pico_cfg['keywords']]
|
||||
else:
|
||||
keyword_paths = pico_cfg['keyword_paths']
|
||||
|
||||
if pico_cfg['sensitivities'] is None:
|
||||
pico_cfg['sensitivities'] = [0.5] * len(keyword_paths)
|
||||
elif isinstance(pico_cfg['sensitivities'], float):
|
||||
pico_cfg['sensitivities'] = [pico_cfg['sensitivities']] * len(keyword_paths)
|
||||
|
||||
if len(keyword_paths) != len(pico_cfg['sensitivities']):
|
||||
raise ValueError('Number of keywords does not match the number of sensitivities.')
|
||||
|
||||
try:
|
||||
self.porcupine = pvporcupine.create(
|
||||
access_key=pico_cfg['access_key'],
|
||||
keywords=pico_cfg['keywords'],
|
||||
keyword_paths=keyword_paths,
|
||||
model_path=pico_cfg['model_path'],
|
||||
sensitivities=pico_cfg['sensitivities'],
|
||||
library_path=pico_cfg['library_path'])
|
||||
except pvporcupine.PorcupineInvalidArgumentError as e:
|
||||
print("One or more arguments provided to Porcupine is invalid: ", pico_cfg.keys())
|
||||
print(e)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationError as e:
|
||||
print("AccessKey activation error")
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationLimitError as e:
|
||||
print("AccessKey '%s' has reached it's temporary device limit" % pico_cfg['access_key'])
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationRefusedError as e:
|
||||
print("AccessKey '%s' refused" % pico_cfg['access_key'])
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationThrottledError as e:
|
||||
print("AccessKey '%s' has been throttled" % pico_cfg['access_key'])
|
||||
raise e
|
||||
except pvporcupine.PorcupineError as e:
|
||||
print("Failed to initialize Porcupine")
|
||||
raise e
|
||||
|
||||
self.pico_cfg = pico_cfg
|
||||
|
||||
def is_wakeup(self, data):
|
||||
pcm = struct.unpack_from("h" * self.porcupine.frame_length, data)
|
||||
result = self.porcupine.process(pcm)
|
||||
# print(f"picovoice result: {result}")
|
||||
if result >= 0:
|
||||
print('[%s] Detected %s' % (str(datetime.now()), self.keywords[result]))
|
||||
return True
|
||||
# self.write_wave(f"output_{time.time()}.wav", [data])
|
||||
# print(f"write to: output_{time.time()}.wav")
|
||||
return False
|
||||
|
||||
|
||||
def record_picovoice(self, return_type=None, exception_on_overflow=False, queue=None):
|
||||
|
||||
print("Recording started. Press Ctrl+C to stop.")
|
||||
while True:
|
||||
data = self.record_chunk_voice(
|
||||
return_type=None,
|
||||
CHUNK=self.porcupine.frame_length,
|
||||
exception_on_overflow=exception_on_overflow,
|
||||
queue=queue)
|
||||
|
||||
wake_up = self.is_wakeup(data)
|
||||
if wake_up:
|
||||
break
|
||||
return True
|
|
@ -0,0 +1,4 @@
|
|||
from .base_hd import BaseHardware
|
||||
from .keyboard import Keyboard
|
||||
from .sipeed import V329
|
||||
from .orangepi import OrangePi
|
|
@ -0,0 +1,32 @@
|
|||
import threading
|
||||
import time
|
||||
|
||||
class BaseHardware:
|
||||
def __init__(self, hd_trigger=None, hd_detect_threshold=50):
|
||||
self.hd_trigger = hd_trigger
|
||||
self.hd_detect_threshold = hd_detect_threshold
|
||||
|
||||
self.hd_lock = threading.Lock()
|
||||
self.shared_hd_status = False
|
||||
|
||||
|
||||
def init_hd_thread(self):
|
||||
hd_thread = threading.Thread(target=self.hd_detection_loop)
|
||||
hd_thread.start()
|
||||
# hd_thread.join()
|
||||
print("HD detection thread started.")
|
||||
|
||||
def hd_detection_loop(self):
|
||||
pass
|
||||
|
||||
@property
|
||||
def is_hardware_pressed(self):
|
||||
return self.shared_hd_status
|
||||
|
||||
def wait_for_hardware_pressed(self):
|
||||
print("Waiting for hardware trigger.")
|
||||
while True:
|
||||
if self.is_hardware_pressed:
|
||||
time.sleep(0.01)
|
||||
break
|
||||
return True
|
|
@ -0,0 +1,36 @@
|
|||
import keyboard
|
||||
import time
|
||||
|
||||
from takway.board.base_hd import BaseHardware
|
||||
|
||||
import datetime
|
||||
#
|
||||
|
||||
class Keyboard(BaseHardware):
|
||||
def __init__(self, hd_trigger='keyboard', keyboard_key='space', hd_detect_threshold=50):
|
||||
super().__init__(hd_trigger, hd_detect_threshold)
|
||||
|
||||
self.keyboard_key = keyboard_key
|
||||
self.init_hd_thread()
|
||||
self.power_status = False # 单次触发按键状态
|
||||
|
||||
def hd_detection_loop(self):
|
||||
keyboard_status = False
|
||||
last_status = False
|
||||
while True:
|
||||
'''
|
||||
keyboard_status = keyboard.is_pressed(self.keyboard_key)
|
||||
with self.hd_lock:
|
||||
self.shared_hd_status = keyboard_status
|
||||
'''
|
||||
self.shared_hd_status = keyboard.is_pressed(self.keyboard_key)
|
||||
time.sleep(0.001)
|
||||
|
||||
if not self.shared_hd_status and last_status:
|
||||
self.power_status = ~self.power_status
|
||||
if self.power_status:
|
||||
print("Chating mode.")
|
||||
else:
|
||||
print("Slience mode.")
|
||||
print(f"pres time: {datetime.datetime.now()}")
|
||||
last_status = self.shared_hd_status
|
|
@ -0,0 +1,94 @@
|
|||
from takway.board.base_hd import BaseHardware
|
||||
import threading
|
||||
import datetime
|
||||
try:
|
||||
import wiringpi
|
||||
from wiringpi import GPIO
|
||||
except:
|
||||
pass
|
||||
|
||||
|
||||
'''
|
||||
| GPIO | LED |
|
||||
| -- | - - |
|
||||
| 0 | 红色 |
|
||||
| 1 | 黄色 |
|
||||
| 2 | 绿色 |
|
||||
| 3 | 蓝色 |
|
||||
| 4 | 白色 |
|
||||
|
||||
| GPIO | BUTTOM |
|
||||
| -- | ---- |
|
||||
| 6 | 按键1 |
|
||||
| 8 | 按键2 |
|
||||
'''
|
||||
|
||||
class OrangePi(BaseHardware):
|
||||
def __init__(self, hd_trigger='button', hd_detect_threshold=50):
|
||||
super().__init__(hd_trigger, hd_detect_threshold)
|
||||
|
||||
self.LED_PIN_red = 0
|
||||
self.LED_PIN_yellow = 1
|
||||
self.LED_PIN_green = 2
|
||||
self.LED_PIN_blue = 3
|
||||
self.LED_PIN_white = 4
|
||||
|
||||
self.BUTTON_PIN_1 = 6
|
||||
self.BUTTON_PIN_2 = 8
|
||||
|
||||
self.button_status_2 = False
|
||||
self.led_set_status_2 = False
|
||||
|
||||
self.power_status = False # 单次触发按键状态
|
||||
|
||||
self.button_init()
|
||||
self.init_hd_thread()
|
||||
|
||||
def button_init(self):
|
||||
wiringpi.wiringPiSetup()
|
||||
# GPIO 输出模式
|
||||
wiringpi.pinMode(self.LED_PIN_red,GPIO.OUTPUT)
|
||||
wiringpi.pinMode(self.LED_PIN_yellow,GPIO.OUTPUT)
|
||||
wiringpi.pinMode(self.LED_PIN_green,GPIO.OUTPUT)
|
||||
wiringpi.pinMode(self.LED_PIN_blue,GPIO.OUTPUT)
|
||||
wiringpi.pinMode(self.LED_PIN_white,GPIO.OUTPUT)
|
||||
# GPIO 输入模式
|
||||
wiringpi.pinMode(self.BUTTON_PIN_1,GPIO.INPUT)
|
||||
wiringpi.pinMode(self.BUTTON_PIN_2,GPIO.INPUT)
|
||||
|
||||
def init_hd_thread(self):
|
||||
# hd_threads = [threading.Thread(target=self.hd_detection_loop),
|
||||
# threading.Thread(target=self.hd_detection_loop_2)]
|
||||
hd_threads = [threading.Thread(target=self.hd_detection_loop)]
|
||||
for hd_thread in hd_threads:
|
||||
hd_thread.start()
|
||||
|
||||
def hd_detection_loop(self):
|
||||
keyboard_status = False
|
||||
last_status = False
|
||||
while True:
|
||||
self.button_status = wiringpi.digitalRead(self.BUTTON_PIN_1)
|
||||
if self.button_status:
|
||||
wiringpi.digitalWrite(self.LED_PIN_red, GPIO.LOW)
|
||||
else:
|
||||
wiringpi.digitalWrite(self.LED_PIN_red,GPIO.HIGH)
|
||||
|
||||
if not self.button_status and last_status:
|
||||
self.power_status = ~self.power_status
|
||||
if self.power_status:
|
||||
print("Chating mode.")
|
||||
else:
|
||||
print("Slience mode.")
|
||||
print(f"pres time: {datetime.datetime.now()}")
|
||||
last_status = self.button_status
|
||||
|
||||
def set_led_on(self, color='red'):
|
||||
wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH)
|
||||
|
||||
def set_led_off(self, color='red'):
|
||||
wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.LOW)
|
||||
|
||||
if __name__ == '__main__':
|
||||
orangepi = OrangePi()
|
||||
while True:
|
||||
pass
|
|
@ -0,0 +1,58 @@
|
|||
import sys
|
||||
import warnings
|
||||
import threading
|
||||
import time
|
||||
from collections import deque
|
||||
|
||||
from takway.board.base_hd import BaseHardware
|
||||
|
||||
if "gpiod" in sys.modules:
|
||||
# sipeed MaixSense V329
|
||||
import gpiod as gpio
|
||||
else:
|
||||
# 如果所有库都不存在,执行默认操作或抛出异常
|
||||
# raise ImportError("gpiod package is not available.")
|
||||
pass
|
||||
|
||||
class V329(BaseHardware):
|
||||
def __init__(self, hd_trigger='button', hd_detect_threshold=50):
|
||||
super().__init__(hd_trigger, hd_detect_threshold)
|
||||
self.button = self.button_init()
|
||||
|
||||
self.init_hd_thread()
|
||||
|
||||
def button_init(self):
|
||||
PH_BASE = (8-1)*32 #PH
|
||||
|
||||
gpiochip1 = gpio.chip("gpiochip1")
|
||||
button = gpiochip1.get_line((PH_BASE+5))
|
||||
config = gpio.line_request()
|
||||
config.request_type = gpio.line_request.DIRECTION_INPUT
|
||||
config.flags = gpio.line_request.FLAG_BIAS_PULL_UP
|
||||
button.request(config)
|
||||
return button
|
||||
|
||||
@property
|
||||
def button_status(self):
|
||||
return True if self.button.get_value() == 1 else False
|
||||
|
||||
def hd_detection_loop(self):
|
||||
self.shared_hd_status = False
|
||||
button_value_list = deque(maxlen=self.hd_detect_threshold)
|
||||
|
||||
while True:
|
||||
if len(button_value_list) > button_value_list.maxlen:
|
||||
button_value_list.popleft()
|
||||
button_value_list.append(self.button_status)
|
||||
# 记录50个值,如果连续50个值都是True,则认为按钮被按下
|
||||
if button_value_list.count(True) == button_value_list.maxlen:
|
||||
with self.hd_lock:
|
||||
self.shared_hd_status = True
|
||||
# 记录50个值,如果连续50个值都是False,则认为按钮被松开
|
||||
if button_value_list.count(False) == button_value_list.maxlen:
|
||||
with self.hd_lock:
|
||||
self.shared_hd_status = False
|
||||
|
||||
|
||||
|
||||
|
|
@ -0,0 +1,11 @@
|
|||
try:
|
||||
import cv2
|
||||
except:
|
||||
warnings.warn("OpenCV is not installed, please check the module if you need.")
|
||||
|
||||
class Camera:
|
||||
def __init__(self,
|
||||
device='pc',
|
||||
width=1280,
|
||||
height=720):
|
||||
pass
|
|
@ -0,0 +1,160 @@
|
|||
import os
|
||||
import json
|
||||
import time
|
||||
import datetime
|
||||
import requests
|
||||
|
||||
from takway.common_utils import encode_bytes2str, decode_str2bytes
|
||||
|
||||
'''
|
||||
{
|
||||
"RESPONSE_INFO": {
|
||||
"status": "success/error", # string
|
||||
"message": "xxxxx", # string
|
||||
},
|
||||
"DATA": {
|
||||
"Audio": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"rate": ; # int
|
||||
"channels": ; # int
|
||||
"format": ; # int
|
||||
}
|
||||
},
|
||||
"Text": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"is_end": True/False, # bool
|
||||
}
|
||||
},
|
||||
"Image": {
|
||||
"data": "xxxxx", # base64 encoded data
|
||||
"metadata": {
|
||||
"width": ; # int
|
||||
"height": ; # int
|
||||
"format": ; # string
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
'''
|
||||
|
||||
class Client:
|
||||
def __init__(self, server_url):
|
||||
self.server_url = server_url
|
||||
|
||||
def gen_request_data(self, **kwargs):
|
||||
# print("kwargs:", kwargs)
|
||||
audio_data = kwargs.get("audio_data", None)
|
||||
text_data = kwargs.get("text_data", dict())
|
||||
return json.dumps(
|
||||
{
|
||||
"is_end": audio_data.get("is_end"), # bool
|
||||
"is_bgn": audio_data.get("is_bgn"), # bool
|
||||
"DATA": {
|
||||
"Audio": {
|
||||
"data": encode_bytes2str(audio_data['frames']), # base64 encoded data
|
||||
"metadata": {
|
||||
"frames_size": audio_data.get("frames_size"), # string
|
||||
"chunk_size": audio_data.get("chunk_size"), # int
|
||||
"is_end": audio_data.get("is_end"), # bool
|
||||
}
|
||||
},
|
||||
"Text": {
|
||||
"data": text_data.get("text"), # base64 encoded data
|
||||
"metadata": {
|
||||
"chat_status": text_data.get("chat_status"), # string
|
||||
"chat_history": text_data.get("chat_history"), # list of dict
|
||||
}
|
||||
},
|
||||
},
|
||||
"META_INFO": {
|
||||
# "model_version": kwargs.get("model_version", ""), # string
|
||||
# "model_url": kwargs.get("model_url", ""), # string
|
||||
"character": {
|
||||
"name": kwargs.get("character", "Klee"), # string
|
||||
"speaker_id": kwargs.get("speaker_id", 113), # int
|
||||
"wakeup_words": kwargs.get("wakeup_words", "可莉来啦"), # list of string
|
||||
}
|
||||
}
|
||||
}
|
||||
) + '\n'
|
||||
|
||||
def send_data_to_server(self, **kwargs):
|
||||
return requests.post(self.server_url,
|
||||
data=self.gen_request_data(**kwargs), stream=True)
|
||||
|
||||
|
||||
|
||||
|
||||
# ############################################ #
|
||||
# ############ WebSocket Client ############# #
|
||||
def check_audio_type(data, return_type='base64'):
|
||||
'''
|
||||
Check if the data type is valid.
|
||||
'''
|
||||
assert return_type in ['bytes', 'base64']
|
||||
if return_type == 'base64':
|
||||
if isinstance(data, bytes):
|
||||
return encode_bytes2str(data)
|
||||
elif return_type == 'bytes':
|
||||
if isinstance(data, str):
|
||||
return decode_str2bytes(data)
|
||||
else:
|
||||
raise ValueError('Invalid data type: {}.'.format(type(data)))
|
||||
|
||||
import websocket
|
||||
from websocket import create_connection
|
||||
|
||||
class BaseWebSocketClient:
|
||||
def __init__(self, server_url, session_id):
|
||||
self.server_url = server_url
|
||||
self.session_id = session_id
|
||||
|
||||
def wakeup_client(self):
|
||||
'''
|
||||
Start the client.
|
||||
'''
|
||||
self.websocket = create_connection(self.server_url)
|
||||
|
||||
def send_per_data(self,
|
||||
text: str = '',
|
||||
audio: bytes = b'',
|
||||
stream: bool = True,
|
||||
voice_synthesize: bool = False,
|
||||
is_end: bool = False,
|
||||
encoding: str = 'base64',
|
||||
):
|
||||
'''
|
||||
Send data to server.
|
||||
|
||||
Args:
|
||||
data: bytes, data to be sent to server.
|
||||
'''
|
||||
|
||||
self.websocket.send(json.dumps({
|
||||
"text": text,
|
||||
"audio": check_audio_type(audio, return_type=encoding),
|
||||
"meta_info": {
|
||||
"session_id": self.session_id,
|
||||
"stream": stream,
|
||||
"voice_synthesize": voice_synthesize,
|
||||
"is_end": is_end,
|
||||
"encoding": encoding,
|
||||
}}))
|
||||
|
||||
def receive_per_data(self):
|
||||
try:
|
||||
recv_data = self.websocket.recv()
|
||||
except websocket._exceptions.WebSocketConnectionClosedException:
|
||||
return None, None
|
||||
try:
|
||||
recv_data = json.loads(recv_data)
|
||||
except json.JSONDecodeError as e:
|
||||
# print(f"JSONDecodeError: {e}")
|
||||
# is_end = True
|
||||
pass
|
||||
except Exception as e:
|
||||
# print(f"receive_per_data error: {e}")
|
||||
assert isinstance(recv_data, bytes), ValueError(f"Received data is not bytes, got {type(recv_data)}.")
|
||||
return recv_data, type(recv_data)
|
|
@ -0,0 +1,513 @@
|
|||
# basic
|
||||
import io
|
||||
import time
|
||||
import json
|
||||
import random
|
||||
from collections import deque
|
||||
# log
|
||||
import logging
|
||||
import warnings
|
||||
# multiprocessing
|
||||
import queue
|
||||
import threading
|
||||
import multiprocessing
|
||||
# web request
|
||||
import requests
|
||||
import pyaudio
|
||||
# hot words detection
|
||||
import pvporcupine
|
||||
|
||||
from takway.apps.data_struct import QueueIterator
|
||||
from takway.common_utils import *
|
||||
from takway.audio_utils import PicovoiceRecorder
|
||||
from takway.clients.client_utils import CharacterClient
|
||||
from takway.audio_utils import AudioPlayer
|
||||
from takway.emo_utils import EmoVideoPlayer
|
||||
|
||||
|
||||
class LocalClinet:
|
||||
def __init__(self,
|
||||
server_args,
|
||||
recorder_args,
|
||||
video_args,
|
||||
emo_args,
|
||||
log_args):
|
||||
# server_args
|
||||
self.server_args = server_args
|
||||
# recorder_args
|
||||
self.recorder_args = recorder_args
|
||||
# video_args
|
||||
self.video_args = video_args
|
||||
# emo_args
|
||||
self.emo_args = emo_args
|
||||
# log_args
|
||||
self.log_args = log_args
|
||||
|
||||
# TODO: 设计多进程log queue
|
||||
self.logger_init()
|
||||
|
||||
|
||||
def logger_init(self):
|
||||
# log_args
|
||||
log_level = self.log_args['log_level']
|
||||
log_file = self.log_args['log_file']
|
||||
|
||||
if log_level == 'debug':
|
||||
log_level = logging.DEBUG
|
||||
elif log_level == 'info':
|
||||
log_level = logging.INFO
|
||||
|
||||
# logger
|
||||
self.logger = logging.getLogger('mylogger')
|
||||
self.logger.setLevel(log_level)
|
||||
# handler 创建一个handler,用于写入日志文件
|
||||
handler = logging.FileHandler(log_file)
|
||||
handler.setLevel(log_level)
|
||||
# stream handler 创建一个handler,用于输出到控制台
|
||||
console = logging.StreamHandler()
|
||||
console.setLevel(logging.INFO)
|
||||
|
||||
# 定义handler的输出格式(formatter)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
console.setFormatter(formatter)
|
||||
|
||||
# 添加handler
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.addHandler(console)
|
||||
self.logger.info("Logger started.")
|
||||
|
||||
def process_init(self):
|
||||
# multiprocessing
|
||||
manager = multiprocessing.Manager()
|
||||
self.trigger_queue = manager.Queue()
|
||||
self.client_queue = manager.Queue()
|
||||
self.audio_play_queue = manager.Queue()
|
||||
self.emo_display_queue = manager.Queue()
|
||||
|
||||
self.share_time_dict = manager.dict()
|
||||
|
||||
self.speaking_emo_event = manager.Event()
|
||||
|
||||
processes = [
|
||||
multiprocessing.Process(target=self.audio_process,
|
||||
args=(self.trigger_queue,self.client_queue)),
|
||||
# multiprocessing.Process(target=self.camera_process, args=(self.trigger_queue,self.client_queue)),
|
||||
multiprocessing.Process(target=self.local_client_process,
|
||||
args=(self.client_queue,self.audio_play_queue,self.emo_display_queue, self.share_time_dict)),
|
||||
multiprocessing.Process(target=self.audio_play_process,
|
||||
args=(self.audio_play_queue,self.share_time_dict)),
|
||||
]
|
||||
if self.emo_args.pop('enable'):
|
||||
processes.append(
|
||||
multiprocessing.Process(target=self.emo_display_process, args=(self.emo_display_queue,)),
|
||||
)
|
||||
|
||||
for process in processes:
|
||||
process.start()
|
||||
for process in processes:
|
||||
process.join()
|
||||
|
||||
def audio_process(self,
|
||||
trigger_queue,
|
||||
client_queue):
|
||||
"""audio_process
|
||||
|
||||
Args:
|
||||
trigger_queue: multiprocessing.Queue, trigger queue
|
||||
client_queue: multiprocessing.Queue, client queue
|
||||
"""
|
||||
|
||||
self.frame_chunk_size = self.recorder_args.pop('frame_chunk_size')
|
||||
self.min_stream_record_time = self.recorder_args.pop('min_stream_record_time')
|
||||
voice_trigger = self.recorder_args.pop('voice_trigger')
|
||||
self.RATE = self.recorder_args['RATE']
|
||||
|
||||
recorder = PicovoiceRecorder(**self.recorder_args)
|
||||
|
||||
# shared data struct:
|
||||
self.shared_waiting = False
|
||||
self.shared_lock = threading.Lock()
|
||||
self.shared_data_lock = threading.Lock()
|
||||
|
||||
# create threads
|
||||
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
|
||||
if voice_trigger:
|
||||
vioce_threads = [
|
||||
threading.Thread(target=self.voice_trigger_thread, args=(recorder,)),
|
||||
]
|
||||
threads.extend(vioce_threads)
|
||||
for thread in threads:
|
||||
thread.start()
|
||||
self.logger.info("Audio Process started.")
|
||||
|
||||
while True:
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
print(f"audio process exit") ; exit()
|
||||
|
||||
|
||||
def hardware_trigger_thread(self, recorder):
|
||||
"""hardware_trigger_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
self.logger.info("Hardware trigger thread started.")
|
||||
|
||||
trgrigger_status = False
|
||||
while True:
|
||||
if self.shared_waiting:
|
||||
continue
|
||||
|
||||
# init status buffer
|
||||
is_bgn = True
|
||||
_frames = 0
|
||||
_total_frames = 0
|
||||
frames = []
|
||||
full_frames = []
|
||||
|
||||
print("Waiting for button press...")
|
||||
recorder.wait_for_hardware_pressed()
|
||||
print("Button pressed.")
|
||||
# stop voice trigger thread
|
||||
with self.shared_data_lock:
|
||||
self.shared_waiting = True # shared_waiting 控制所有线程的待机状态,True表示待机,False表示工作
|
||||
|
||||
print("Start recording...")
|
||||
bg_t = time.time()
|
||||
record_chunk_size = recorder.hd_chunk_size
|
||||
while True:
|
||||
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=record_chunk_size,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
|
||||
frames.append(data)
|
||||
full_frames.append(data)
|
||||
_total_frames += 1
|
||||
|
||||
if not recorder.is_hardware_pressed:
|
||||
break
|
||||
|
||||
stream_reset_status = self.stream_record(
|
||||
bytes_frames=recorder.write_wave_bytes(full_frames),
|
||||
frames_size=len(full_frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=False)
|
||||
if stream_reset_status:
|
||||
full_frames.clear()
|
||||
is_bgn = False
|
||||
|
||||
self.stream_record(
|
||||
bytes_frames=recorder.write_wave_bytes(full_frames),
|
||||
frames_size=len(full_frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=True)
|
||||
|
||||
print(f"Tatal frames: {_total_frames*record_chunk_size}, {_total_frames*record_chunk_size/recorder.RATE} sec.")
|
||||
# recorder.write_wave_io(f"record_{int(bg_t)}.wav", frames); print(f"write record_{int(bg_t)}.wav")
|
||||
|
||||
with self.shared_data_lock:
|
||||
self.shared_waiting = False # 恢复voice trigger线程工作
|
||||
|
||||
def voice_trigger_thread(self, recorder):
|
||||
"""voice_trigger_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
self.logger.info("voice record thread started.")
|
||||
|
||||
print("Waiting for wake up...")
|
||||
while True:
|
||||
if self.shared_waiting:
|
||||
continue
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=recorder.porcupine.frame_length,
|
||||
return_type=None,
|
||||
exception_on_overflow=False,
|
||||
queue=None)
|
||||
|
||||
record_chunk_size = recorder.vad_chunk_size
|
||||
|
||||
if not recorder.is_wakeup(data):
|
||||
continue
|
||||
# wake up
|
||||
is_bgn = True
|
||||
_frames = 0
|
||||
_total_frames = 0
|
||||
frames = []
|
||||
full_frames = []
|
||||
# status buffer
|
||||
buffer_size = recorder.vad_buffer_size
|
||||
active_buffer = deque(maxlen=buffer_size)
|
||||
bg_t = time.time()
|
||||
print("Start recording...")
|
||||
while True:
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=record_chunk_size,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
is_speech = recorder.is_speech(data)
|
||||
if is_speech:
|
||||
_frames += 1
|
||||
frames.append(data)
|
||||
print("add vad frame")
|
||||
_total_frames += 1
|
||||
full_frames.append(data)
|
||||
|
||||
# send chunk data to client
|
||||
stream_reset_status = self.stream_record(
|
||||
bytes_frames=recorder.write_wave_bytes(full_frames),
|
||||
frames_size=len(full_frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=False)
|
||||
if stream_reset_status:
|
||||
full_frames.clear()
|
||||
is_bgn = False
|
||||
|
||||
if is_speech:
|
||||
if active_buffer.__len__() == buffer_size:
|
||||
active_buffer.popleft()
|
||||
active_buffer.append(True)
|
||||
else:
|
||||
if active_buffer.__len__() == buffer_size:
|
||||
active_buffer.popleft()
|
||||
active_buffer.append(False)
|
||||
if active_buffer.count(False) != active_buffer.maxlen:
|
||||
continue
|
||||
if time.time() - bg_t > recorder.min_act_time:
|
||||
# end recording
|
||||
self.stream_record(
|
||||
bytes_frames=recorder.write_wave_bytes(full_frames),
|
||||
frames_size=len(full_frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=True)
|
||||
print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid rate: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
|
||||
print("End recording.")
|
||||
break
|
||||
|
||||
|
||||
|
||||
def stream_record(self,
|
||||
bytes_frames: bytes,
|
||||
frames_size: int,
|
||||
record_chunk_size: int,
|
||||
is_bgn: bool,
|
||||
is_end: bool):
|
||||
'''
|
||||
Args:
|
||||
bytes_frames: bytes, audio data
|
||||
frames_size: int, audio data size
|
||||
record_chunk_size: int, audio data chunk size
|
||||
is_bgn: bool, is begin of stream
|
||||
is_end: bool, is end of stream
|
||||
|
||||
Returns:
|
||||
bool, if stream reset status
|
||||
'''
|
||||
if len(bytes_frames) == 0:
|
||||
return False
|
||||
if frames_size*record_chunk_size >= self.min_stream_record_time*self.RATE or is_end:
|
||||
if is_bgn and is_end:
|
||||
return False
|
||||
stream_data = dict(
|
||||
frames=bytes_frames,
|
||||
frames_size=frames_size,
|
||||
chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=is_end)
|
||||
self.client_queue.put(('audio', stream_data))
|
||||
if is_end:
|
||||
print("put None to client queue.")
|
||||
self.client_queue.put(None)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def camera_process(self, logger, trigger_queue, client_queue):
|
||||
from takway.cam_utils import Camera
|
||||
cam = Camera(self.video_args)
|
||||
while True:
|
||||
if trigger_queue.empty():
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
item = trigger_queue.get()
|
||||
if item[0] == 'trgrigger_status' and item[1]:
|
||||
_, frame = cap.read()
|
||||
client_queue.put(('image', frame))
|
||||
|
||||
|
||||
def local_client_process(self,
|
||||
client_queue,
|
||||
audio_play_queue,
|
||||
emo_display_queue,
|
||||
share_time_dict):
|
||||
'''
|
||||
Args:
|
||||
client_queue: multiprocessing.Queue, client queue
|
||||
audio_play_queue: multiprocessing.Queue, audio play queue
|
||||
emo_display_queue: multiprocessing.Queue, emo display queue
|
||||
share_time_dict: multiprocessing.Manager.dict, shared time dict
|
||||
'''
|
||||
character = self.server_args.pop('character')
|
||||
client = CharacterClient(**self.server_args)
|
||||
|
||||
# print(f"-------------------{character}-------------------")
|
||||
# print(f"client.chat_status: {client.chat_status}")
|
||||
if client.chat_status == 'init':
|
||||
client.set_character(character)
|
||||
client.chat_status = 'chating'
|
||||
# print(f"client.chat_history: {client.chat_history}")
|
||||
|
||||
self.logger.info("Local client process started.")
|
||||
|
||||
while True:
|
||||
response = None
|
||||
if self.client_queue.empty():
|
||||
continue
|
||||
try:
|
||||
response = requests.post(client.server_url, stream=True,
|
||||
data=self.generate_stream_queue_data(client, client_queue))
|
||||
print("get response from server.")
|
||||
self.get_stream_response(client, response, audio_play_queue, emo_display_queue)
|
||||
except requests.exceptions.ConnectionError or ConnectionError as e:
|
||||
print(f"Wait for Server connection...")
|
||||
except requests.exceptions.Timeout or Timeout as e:
|
||||
print(f"Timeout: {e}")
|
||||
except requests.exceptions.ChunkedEncodingError:
|
||||
print("ChunkedEncodingError")
|
||||
|
||||
def generate_stream_queue_data(self, client, client_queue, **kwargs):
|
||||
_i = 0
|
||||
for queue_data in QueueIterator(client_queue):
|
||||
if queue_data[0] == 'audio':
|
||||
_i += 1
|
||||
if _i == 1:
|
||||
self.share_time_dict['client_time'] = [time.time()]
|
||||
else:
|
||||
self.share_time_dict['client_time'].append(time.time())
|
||||
audio_data = queue_data[1]
|
||||
print("send audio data to server...")
|
||||
# print(f"local chat history: {client.chat_history}")
|
||||
yield client.gen_request_data(
|
||||
audio_data=audio_data,
|
||||
chat_data=dict(
|
||||
chat_history=client.chat_history,
|
||||
chat_status=client.chat_status),
|
||||
character_data=client.character_info)
|
||||
|
||||
def get_stream_response(self,
|
||||
client,
|
||||
response,
|
||||
audio_play_queue=None,
|
||||
emo_display_queue=None,
|
||||
chunk_size=1024):
|
||||
'''
|
||||
Args:
|
||||
client: takway.client_utils.CharacterClient, client object
|
||||
response: requests.Response, response object
|
||||
audio_play_queue: multiprocessing.Queue, audio play queue
|
||||
emo_display_queue: multiprocessing.Queue, emo display queue
|
||||
chunk_size: int, chunk size
|
||||
'''
|
||||
assert isinstance(response, requests.Response), \
|
||||
f"response is not requests.Response, but {type(response)}"
|
||||
|
||||
temp_data = '' # init temp_data
|
||||
|
||||
if response.status_code == 200:
|
||||
print("get response from server successfully.")
|
||||
else:
|
||||
print(f"response error, status code: {response.status_code}")
|
||||
|
||||
chat_llm_response = ''
|
||||
|
||||
_i = 0
|
||||
# for chunk in response.iter_lines():
|
||||
# if chunk:
|
||||
for chunk in response.iter_content(chunk_size=chunk_size):
|
||||
temp_data += chunk.decode('utf-8')
|
||||
if temp_data.endswith('\n'):
|
||||
_i += 1
|
||||
try:
|
||||
temp_json = json.loads(temp_data.rstrip('\n'))
|
||||
# phase 1: get audio data
|
||||
audio_play_queue.put(('server_data', temp_json['audio_output']['tts_stream_data']))
|
||||
# phase 2: get chat data
|
||||
chat_llm_response += temp_json['chat_output']['llm_stream_data']
|
||||
|
||||
if temp_json['is_end']:
|
||||
client.update_chat_history(question=temp_json['chat_output']['question'],
|
||||
response=chat_llm_response, asw_prompt_id=1)
|
||||
# print(f"chat_history: {client.chat_history}")
|
||||
if _i == 1:
|
||||
emo_display_queue.put(('emo_data', '高兴'))
|
||||
except json.JSONDecodeError:
|
||||
print(f"json decode error: {temp_data}")
|
||||
temp_data = ''
|
||||
# print("get response.")
|
||||
print("End get response.")
|
||||
|
||||
def audio_play_process(self,
|
||||
audio_play_queue,
|
||||
share_time_dict):
|
||||
'''
|
||||
Args:
|
||||
audio_play_queue: multiprocessing.Queue, audio play queue
|
||||
share_time_dict: multiprocessing.Manager.dict, shared time dict
|
||||
'''
|
||||
audio_player = AudioPlayer()
|
||||
self.logger.info("Audio play process started.")
|
||||
while True:
|
||||
self.speaking_emo_event.clear()
|
||||
item = audio_play_queue.get()
|
||||
self.speaking_emo_event.set() # stop emo random display
|
||||
if item[0] == 'server_data':
|
||||
# 播放音频
|
||||
print("Playing audio...")
|
||||
tts_audio = item[1]
|
||||
print(f"wait time: {(time.time() - self.share_time_dict['client_time'][0])*1000:.2f} ms")
|
||||
try:
|
||||
audio_player.play(tts_audio)
|
||||
except TypeError as e:
|
||||
# print(f"audio play error: {e}")
|
||||
# print(f"tts_audio: {tts_audio}")
|
||||
# print(f"type tts_audio: {type(tts_audio)}")
|
||||
# tts_audio: <class 'NoneType'>
|
||||
continue
|
||||
|
||||
|
||||
def emo_display_process(self, emo_display_queue):
|
||||
'''
|
||||
Args:
|
||||
emo_display_queue: multiprocessing.Queue, emo display queue
|
||||
'''
|
||||
emo_player = EmoVideoPlayer(**self.emo_args)
|
||||
self.logger.info("Emo display process started.")
|
||||
while True:
|
||||
if emo_display_queue.empty():
|
||||
time.sleep(0.1)
|
||||
if self.speaking_emo_event.is_set():
|
||||
continue
|
||||
emo_player.random_wink()
|
||||
else:
|
||||
item = emo_display_queue.get()
|
||||
print(f"Emo display process Get item: {item[0]}")
|
||||
if item[0] == 'emo_data':
|
||||
server_data = item[1]
|
||||
print("Displaying emo...")
|
||||
emo_player.display_emo(emo_name='兴奋', stage='start')
|
||||
emo_player.display_emo(emo_name='兴奋', stage='loop')
|
||||
emo_player.display_emo(emo_name='兴奋', stage='end')
|
||||
print("Display done.")
|
||||
time.sleep(15)
|
|
@ -0,0 +1,330 @@
|
|||
# basic
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import random
|
||||
from collections import deque
|
||||
from datetime import datetime
|
||||
# log
|
||||
import logging
|
||||
import warnings
|
||||
# multiprocessing
|
||||
import queue
|
||||
import threading
|
||||
import multiprocessing
|
||||
# web request
|
||||
import requests
|
||||
import pyaudio
|
||||
# hot words detection
|
||||
import pvporcupine
|
||||
|
||||
from takway.apps.data_struct import QueueIterator
|
||||
from takway.common_utils import *
|
||||
from takway.audio_utils import PicovoiceRecorder, HDRecorder
|
||||
from takway.clients.client_utils import BaseWebSocketClient
|
||||
from takway.audio_utils import AudioPlayer
|
||||
|
||||
|
||||
class WebSocketClinet:
|
||||
def __init__(self,
|
||||
board,
|
||||
server_args,
|
||||
recorder_args,
|
||||
player_args,
|
||||
log_args,
|
||||
excute_args=None,
|
||||
):
|
||||
self.board = board
|
||||
# server_args
|
||||
self.server_args = server_args
|
||||
# recorder_args
|
||||
self.recorder_args = recorder_args
|
||||
# player_args
|
||||
self.player_args = player_args
|
||||
# excute_args
|
||||
self.excute_args = excute_args
|
||||
# log_args
|
||||
self.log_args = log_args
|
||||
|
||||
|
||||
def process_init(self):
|
||||
# multiprocessing
|
||||
manager = multiprocessing.Manager()
|
||||
self.trigger_queue = manager.Queue()
|
||||
self.client_queue = manager.Queue()
|
||||
self.audio_play_queue = manager.Queue()
|
||||
self.excute_queue = manager.Queue()
|
||||
|
||||
# 多进程标志为
|
||||
self.mircophone_active_set = manager.Event()
|
||||
self.speaker_active_set = manager.Event()
|
||||
|
||||
processes = [
|
||||
multiprocessing.Process(target=self.audio_process),
|
||||
multiprocessing.Process(target=self.web_socket_client_process),
|
||||
multiprocessing.Process(target=self.audio_play_process),
|
||||
]
|
||||
if self.excute_args.get('enable', False):
|
||||
processes.append(
|
||||
multiprocessing.Process(target=self.excute_process),
|
||||
)
|
||||
|
||||
for process in processes:
|
||||
time.sleep(0.5)
|
||||
process.start()
|
||||
for process in processes:
|
||||
process.join()
|
||||
|
||||
def audio_process(self):
|
||||
"""audio_process
|
||||
|
||||
Args:
|
||||
trigger_queue: multiprocessing.Queue, trigger queue
|
||||
client_queue: multiprocessing.Queue, client queue
|
||||
"""
|
||||
min_stream_record_time = self.recorder_args.pop('min_stream_record_time')
|
||||
voice_trigger = self.recorder_args.pop('voice_trigger')
|
||||
# TODO:
|
||||
press_type = self.recorder_args.pop('press_type')
|
||||
max_slience_time = self.recorder_args.pop('max_slience_time')
|
||||
if voice_trigger:
|
||||
recorder = PicovoiceRecorder(**self.recorder_args)
|
||||
else:
|
||||
voice_keys = ['access_key', 'keywords', 'keyword_paths', 'model_path','sensitivities', 'library_path']
|
||||
for key in voice_keys:
|
||||
self.recorder_args.pop(key)
|
||||
recorder = HDRecorder(**self.recorder_args)
|
||||
recorder.min_stream_record_time = min_stream_record_time
|
||||
# TODO:
|
||||
recorder.press_type = press_type
|
||||
recorder.max_slience_time = max_slience_time
|
||||
|
||||
print("Audio Process started.")
|
||||
|
||||
print("Waiting for wake up...")
|
||||
# recorder.hardware.set_led_on("green")
|
||||
while True:
|
||||
if self.shared_waiting:
|
||||
continue
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=recorder.porcupine.frame_length,
|
||||
return_type=None,
|
||||
exception_on_overflow=False,
|
||||
queue=None)
|
||||
|
||||
record_chunk_size = recorder.vad_chunk_size
|
||||
|
||||
# 开关按键被按下或被关键词唤醒
|
||||
if recorder.hardware.power_status or recorder.is_wakeup(data):
|
||||
# recorder.hardware.set_led_on("blue")
|
||||
pass
|
||||
else:
|
||||
continue
|
||||
# wake up
|
||||
is_bgn = True
|
||||
is_end = False
|
||||
frames = []
|
||||
# status buffer
|
||||
slience_bgn_t = time.time()
|
||||
slience_time = 0
|
||||
print("Start recording...")
|
||||
# 准备对话状态
|
||||
while True:
|
||||
# 语音活动检测
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=record_chunk_size,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
is_speech = recorder.is_speech(data)
|
||||
|
||||
# 判断状态
|
||||
if is_speech:
|
||||
print("valid voice")
|
||||
slience_bgn_t = time.time()
|
||||
frames.append(data)
|
||||
else:
|
||||
slience_time = time.time() - slience_bgn_t
|
||||
|
||||
# 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束
|
||||
if slience_time > recorder.max_slience_time or not recorder.hardware.power_status:
|
||||
break
|
||||
|
||||
# 短时沉默结束单次对话:沉默时间超过一定时间段(0.5s左右),则发送数据
|
||||
if slience_time > recorder.min_act_time:
|
||||
is_end = True
|
||||
is_bgn = False
|
||||
|
||||
if not is_speech:
|
||||
continue
|
||||
|
||||
# 流式发送数据
|
||||
stream_reset_status = self.stream_record_process(
|
||||
bytes_frames=recorder.write_wave_bytes(frames),
|
||||
frames_size=len(frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
sample_rate=recorder.RATE,
|
||||
min_stream_record_time=recorder.min_stream_record_time,
|
||||
is_bgn=is_bgn,
|
||||
is_end=is_end)
|
||||
|
||||
if stream_reset_status:
|
||||
frames.clear()
|
||||
is_bgn = False
|
||||
|
||||
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
|
||||
# print("End recording.")
|
||||
# recorder.hardware.set_led_off("blue")
|
||||
|
||||
|
||||
def stream_record_process(self,
|
||||
bytes_frames: bytes,
|
||||
frames_size: int,
|
||||
record_chunk_size: int,
|
||||
sample_rate: int,
|
||||
min_stream_record_time: int,
|
||||
is_bgn: bool,
|
||||
is_end: bool):
|
||||
'''
|
||||
Args:
|
||||
bytes_frames: bytes, audio data
|
||||
frames_size: int, audio data size
|
||||
record_chunk_size: int, audio data chunk size
|
||||
is_bgn: bool, is begin of stream
|
||||
is_end: bool, is end of stream
|
||||
|
||||
Returns:
|
||||
bool, if stream reset status
|
||||
'''
|
||||
if len(bytes_frames) == 0:
|
||||
return False
|
||||
if frames_size*record_chunk_size >= min_stream_record_time*sample_rate or is_end:
|
||||
if is_bgn and is_end:
|
||||
return False
|
||||
stream_data = dict(
|
||||
frames=bytes_frames,
|
||||
frames_size=frames_size,
|
||||
chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=is_end)
|
||||
self.client_queue.put(('audio', stream_data))
|
||||
if is_end:
|
||||
# print("put None to client queue.")
|
||||
self.client_queue.put(None)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def web_socket_client_process(self):
|
||||
|
||||
client = BaseWebSocketClient(self.server_args['server_url'], self.server_args['session_id'])
|
||||
print("Web socket client process started.")
|
||||
# print("Web socket client process started.")
|
||||
|
||||
while True:
|
||||
if self.client_queue.empty():
|
||||
continue
|
||||
|
||||
# print(f"init skt time: {datetime.now()}")
|
||||
# 唤醒
|
||||
client.wakeup_client()
|
||||
|
||||
# 发送数据
|
||||
for queue_data in QueueIterator(self.client_queue):
|
||||
if queue_data[0] == 'audio':
|
||||
audio_dict = queue_data[1]
|
||||
|
||||
client.send_per_data(
|
||||
audio=audio_dict['frames'],
|
||||
stream=True,
|
||||
voice_synthesize=True,
|
||||
is_end=audio_dict['is_end'],
|
||||
encoding='base64',
|
||||
)
|
||||
# print(f"send skt time: {datetime.now()}")
|
||||
# print(f"fnsh skt time: {datetime.now()}")
|
||||
|
||||
# 接收数据
|
||||
while True:
|
||||
response, data_type = client.receive_per_data()
|
||||
if data_type == dict:
|
||||
print(response) # 打印接收到的消息
|
||||
'''
|
||||
try:
|
||||
response = json.loads(response['msg'])
|
||||
if 'content' in response.keys():
|
||||
self.excute_queue.put((response['instruct'], response['content']))
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"json decode error: {e}")
|
||||
continue
|
||||
# print(f"recv json time: {datetime.now()}")
|
||||
'''
|
||||
elif data_type == bytes:
|
||||
# print(f"recv bytes time: {datetime.now()}")
|
||||
self.audio_play_queue.put(('audio_bytes', response))
|
||||
elif data_type == None:
|
||||
break # 如果没有接收到消息,则退出循环
|
||||
# print("接收完毕:", datetime.now())
|
||||
|
||||
|
||||
def audio_play_process(self):
|
||||
'''
|
||||
Args:
|
||||
audio_play_queue: multiprocessing.Queue, audio play queue
|
||||
share_time_dict: multiprocessing.Manager.dict, shared time dict
|
||||
'''
|
||||
audio_player = AudioPlayer(**self.player_args)
|
||||
print("Audio play process started.")
|
||||
while True:
|
||||
item = self.audio_play_queue.get()
|
||||
# 播放音频
|
||||
print("Playing audio...")
|
||||
tts_audio = item[1]
|
||||
print(f"tts_audio len: {len(tts_audio)}")
|
||||
print(f"play audio time: {datetime.now()}")
|
||||
try:
|
||||
# 播放
|
||||
self.speaker_active_set.set()
|
||||
tts_audio = audio_player.check_audio_type(tts_audio, return_type=None)
|
||||
for i in range(0, len(tts_audio), audio_player.CHUNK):
|
||||
audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK])
|
||||
# print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio)))
|
||||
if self.mircophone_active_set.is_set():
|
||||
print("mirophone is active.")
|
||||
self.mircophone_active_set.wait()
|
||||
break
|
||||
audio_player.stream.write(tts_audio[i+audio_player.CHUNK:])
|
||||
print(f"audio data played.")
|
||||
except TypeError as e:
|
||||
print(f"audio play error: {e}")
|
||||
continue
|
||||
|
||||
# audio_player.stream.write(audio_data[i+audio_player.CHUNK:])
|
||||
# print(f"{item[0]} data played.")
|
||||
|
||||
|
||||
|
||||
|
||||
def excute_process(self):
|
||||
'''
|
||||
Args:
|
||||
excute_queue: multiprocessing.Queue, excute display queue
|
||||
'''
|
||||
print("Excute process started.")
|
||||
|
||||
while True:
|
||||
if self.excute_queue.empty():
|
||||
continue
|
||||
|
||||
if self.speaker_active_set.is_set():
|
||||
instruct, content = self.excute_queue.get()
|
||||
|
||||
print(f"Got speaker info: {instruct, content}")
|
||||
|
||||
print(f"Playing {instruct} {content}...")
|
||||
print(f"play {instruct} time: {datetime.now()}")
|
||||
self.audio_play_queue.put((instruct, content))
|
||||
|
||||
self.speaker_active_set.clear()
|
||||
|
|
@ -0,0 +1,464 @@
|
|||
# basic
|
||||
import io
|
||||
import os
|
||||
import sys
|
||||
import time
|
||||
import json
|
||||
import random
|
||||
from collections import deque
|
||||
from datetime import datetime
|
||||
# log
|
||||
import logging
|
||||
import warnings
|
||||
# multiprocessing
|
||||
import queue
|
||||
import threading
|
||||
import multiprocessing
|
||||
# web request
|
||||
import requests
|
||||
import pyaudio
|
||||
# hot words detection
|
||||
import pvporcupine
|
||||
|
||||
from takway.apps.data_struct import QueueIterator
|
||||
from takway.common_utils import *
|
||||
from takway.audio_utils import PicovoiceRecorder, HDRecorder
|
||||
from takway.clients.client_utils import BaseWebSocketClient
|
||||
from takway.audio_utils import AudioPlayer
|
||||
|
||||
|
||||
class WebSocketClinet:
|
||||
def __init__(self,
|
||||
board,
|
||||
server_args,
|
||||
recorder_args,
|
||||
player_args,
|
||||
log_args,
|
||||
excute_args=None,
|
||||
):
|
||||
self.board = board
|
||||
# server_args
|
||||
self.server_args = server_args
|
||||
# recorder_args
|
||||
self.recorder_args = recorder_args
|
||||
# player_args
|
||||
self.player_args = player_args
|
||||
# excute_args
|
||||
self.excute_args = excute_args
|
||||
# log_args
|
||||
self.log_args = log_args
|
||||
|
||||
|
||||
def process_init(self):
|
||||
# multiprocessing
|
||||
manager = multiprocessing.Manager()
|
||||
self.trigger_queue = manager.Queue()
|
||||
self.client_queue = manager.Queue()
|
||||
self.audio_play_queue = manager.Queue()
|
||||
self.excute_queue = manager.Queue()
|
||||
|
||||
# 多进程标志为
|
||||
self.mircophone_active_set = manager.Event()
|
||||
self.speaker_active_set = manager.Event()
|
||||
|
||||
processes = [
|
||||
multiprocessing.Process(target=self.audio_process),
|
||||
multiprocessing.Process(target=self.web_socket_client_process),
|
||||
multiprocessing.Process(target=self.audio_play_process),
|
||||
]
|
||||
if self.excute_args.get('enable', False):
|
||||
processes.append(
|
||||
multiprocessing.Process(target=self.excute_process),
|
||||
)
|
||||
|
||||
for process in processes:
|
||||
time.sleep(0.5)
|
||||
process.start()
|
||||
for process in processes:
|
||||
process.join()
|
||||
|
||||
def audio_process(self):
|
||||
"""audio_process
|
||||
|
||||
Args:
|
||||
trigger_queue: multiprocessing.Queue, trigger queue
|
||||
client_queue: multiprocessing.Queue, client queue
|
||||
"""
|
||||
min_stream_record_time = self.recorder_args.pop('min_stream_record_time')
|
||||
voice_trigger = self.recorder_args.pop('voice_trigger')
|
||||
press_type = self.recorder_args.pop('press_type')
|
||||
if voice_trigger:
|
||||
recorder = PicovoiceRecorder(**self.recorder_args)
|
||||
else:
|
||||
voice_keys = ['access_key', 'keywords', 'keyword_paths', 'model_path','sensitivities', 'library_path']
|
||||
for key in voice_keys:
|
||||
self.recorder_args.pop(key)
|
||||
recorder = HDRecorder(**self.recorder_args)
|
||||
recorder.min_stream_record_time = min_stream_record_time
|
||||
recorder.press_type = press_type
|
||||
|
||||
# shared data struct:
|
||||
self.shared_waiting = False
|
||||
self.shared_lock = threading.Lock()
|
||||
self.shared_data_lock = threading.Lock()
|
||||
|
||||
# create threads
|
||||
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
|
||||
if voice_trigger:
|
||||
vioce_threads = [
|
||||
threading.Thread(target=self.voice_trigger_thread, args=(recorder,)),
|
||||
]
|
||||
threads.extend(vioce_threads)
|
||||
for thread in threads:
|
||||
thread.start()
|
||||
print("Audio Process started.")
|
||||
|
||||
while True:
|
||||
for thread in threads:
|
||||
thread.join()
|
||||
print(f"audio process exit") ; exit()
|
||||
|
||||
|
||||
def hardware_trigger_thread(self, recorder):
|
||||
"""hardware_trigger_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
print("Hardware trigger thread started.")
|
||||
|
||||
trgrigger_status = False
|
||||
record_chunk_size = recorder.hd_chunk_size
|
||||
|
||||
while True:
|
||||
if self.shared_waiting:
|
||||
continue
|
||||
|
||||
# init status buffer
|
||||
is_bgn = True
|
||||
frames = []
|
||||
_total_frames = 0
|
||||
|
||||
self.mircophone_active_set.clear()
|
||||
print("Waiting for button press...")
|
||||
recorder.wait_for_hardware_pressed()
|
||||
print("Button pressed.")
|
||||
self.mircophone_active_set.set()
|
||||
# stop voice trigger thread
|
||||
with self.shared_data_lock:
|
||||
self.shared_waiting = True # shared_waiting 控制所有线程的待机状态,True表示待机,False表示工作
|
||||
|
||||
print("Start recording...")
|
||||
bg_t = time.time()
|
||||
while True:
|
||||
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=record_chunk_size,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
|
||||
frames.append(data)
|
||||
_total_frames += 1
|
||||
|
||||
if not recorder.is_hardware_pressed:
|
||||
# print("Button released.")
|
||||
print(f"button rlse time: {datetime.now()}")
|
||||
break
|
||||
|
||||
stream_reset_status = self.stream_record_process(
|
||||
bytes_frames=recorder.write_wave_bytes(frames),
|
||||
frames_size=len(frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
sample_rate=recorder.RATE,
|
||||
min_stream_record_time=recorder.min_stream_record_time,
|
||||
is_bgn=is_bgn,
|
||||
is_end=False)
|
||||
if stream_reset_status:
|
||||
frames.clear()
|
||||
is_bgn = False
|
||||
|
||||
self.stream_record_process(
|
||||
bytes_frames=recorder.write_wave_bytes(frames),
|
||||
frames_size=len(frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
sample_rate=recorder.RATE,
|
||||
min_stream_record_time=recorder.min_stream_record_time,
|
||||
is_bgn=is_bgn,
|
||||
is_end=True)
|
||||
|
||||
# print(f"Tatal frames: {_total_frames*record_chunk_size}, {_total_frames*record_chunk_size/recorder.RATE} sec.")
|
||||
# print(f"rcrd time: {datetime.now()}")
|
||||
|
||||
with self.shared_data_lock:
|
||||
self.shared_waiting = False # 恢复voice trigger线程工作
|
||||
|
||||
def voice_trigger_thread(self, recorder):
|
||||
"""voice_trigger_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
print("voice record thread started.")
|
||||
|
||||
print("Waiting for wake up...")
|
||||
while True:
|
||||
if self.shared_waiting:
|
||||
continue
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=recorder.porcupine.frame_length,
|
||||
return_type=None,
|
||||
exception_on_overflow=False,
|
||||
queue=None)
|
||||
|
||||
record_chunk_size = recorder.vad_chunk_size
|
||||
|
||||
self.mircophone_active_set.clear()
|
||||
if not recorder.is_wakeup(data):
|
||||
continue
|
||||
|
||||
if self.board == 'orangepi':
|
||||
recorder.hardware.set_led2_on()
|
||||
self.mircophone_active_set.set()
|
||||
# wake up
|
||||
is_bgn = True
|
||||
_frames = 0
|
||||
_total_frames = 0
|
||||
frames = []
|
||||
full_frames = []
|
||||
# status buffer
|
||||
buffer_size = recorder.vad_buffer_size
|
||||
active_buffer = deque(maxlen=buffer_size)
|
||||
bg_t = time.time()
|
||||
print("Start recording...")
|
||||
while True:
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=record_chunk_size,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
if data is None:
|
||||
continue
|
||||
|
||||
is_speech = recorder.is_speech(data)
|
||||
if is_speech:
|
||||
_frames += 1
|
||||
frames.append(data)
|
||||
# print("add vad frame")
|
||||
_total_frames += 1
|
||||
full_frames.append(data)
|
||||
|
||||
# send chunk data to client
|
||||
stream_reset_status = self.stream_record_process(
|
||||
bytes_frames=recorder.write_wave_bytes(full_frames),
|
||||
frames_size=len(full_frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
sample_rate=recorder.RATE,
|
||||
min_stream_record_time=recorder.min_stream_record_time,
|
||||
is_bgn=is_bgn,
|
||||
is_end=False)
|
||||
if stream_reset_status:
|
||||
full_frames.clear()
|
||||
is_bgn = False
|
||||
|
||||
if is_speech:
|
||||
if active_buffer.__len__() == buffer_size:
|
||||
active_buffer.popleft()
|
||||
active_buffer.append(True)
|
||||
else:
|
||||
if active_buffer.__len__() == buffer_size:
|
||||
active_buffer.popleft()
|
||||
active_buffer.append(False)
|
||||
if active_buffer.count(False) != active_buffer.maxlen:
|
||||
continue
|
||||
if time.time() - bg_t > recorder.min_act_time:
|
||||
# end recording
|
||||
self.stream_record_process(
|
||||
bytes_frames=recorder.write_wave_bytes(full_frames),
|
||||
frames_size=len(full_frames),
|
||||
record_chunk_size=record_chunk_size,
|
||||
sample_rate=recorder.RATE,
|
||||
min_stream_record_time=recorder.min_stream_record_time,
|
||||
is_bgn=is_bgn,
|
||||
is_end=True)
|
||||
# print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.")
|
||||
# print("End recording.")
|
||||
break
|
||||
if self.board == 'orangepi':
|
||||
recorder.hardware.set_led2_off()
|
||||
|
||||
|
||||
def stream_record_process(self,
|
||||
bytes_frames: bytes,
|
||||
frames_size: int,
|
||||
record_chunk_size: int,
|
||||
sample_rate: int,
|
||||
min_stream_record_time: int,
|
||||
is_bgn: bool,
|
||||
is_end: bool):
|
||||
'''
|
||||
Args:
|
||||
bytes_frames: bytes, audio data
|
||||
frames_size: int, audio data size
|
||||
record_chunk_size: int, audio data chunk size
|
||||
is_bgn: bool, is begin of stream
|
||||
is_end: bool, is end of stream
|
||||
|
||||
Returns:
|
||||
bool, if stream reset status
|
||||
'''
|
||||
if len(bytes_frames) == 0:
|
||||
return False
|
||||
if frames_size*record_chunk_size >= min_stream_record_time*sample_rate or is_end:
|
||||
if is_bgn and is_end:
|
||||
return False
|
||||
stream_data = dict(
|
||||
frames=bytes_frames,
|
||||
frames_size=frames_size,
|
||||
chunk_size=record_chunk_size,
|
||||
is_bgn=is_bgn,
|
||||
is_end=is_end)
|
||||
self.client_queue.put(('audio', stream_data))
|
||||
if is_end:
|
||||
# print("put None to client queue.")
|
||||
self.client_queue.put(None)
|
||||
return True
|
||||
else:
|
||||
return False
|
||||
|
||||
def web_socket_client_process(self):
|
||||
|
||||
client = BaseWebSocketClient(self.server_args['server_url'], self.server_args['session_id'])
|
||||
print("Web socket client process started.")
|
||||
# print("Web socket client process started.")
|
||||
|
||||
while True:
|
||||
if self.client_queue.empty():
|
||||
continue
|
||||
|
||||
# print(f"init skt time: {datetime.now()}")
|
||||
# 唤醒
|
||||
client.wakeup_client()
|
||||
|
||||
# 发送数据
|
||||
for queue_data in QueueIterator(self.client_queue):
|
||||
if queue_data[0] == 'audio':
|
||||
audio_dict = queue_data[1]
|
||||
|
||||
client.send_per_data(
|
||||
audio=audio_dict['frames'],
|
||||
stream=True,
|
||||
voice_synthesize=True,
|
||||
is_end=audio_dict['is_end'],
|
||||
encoding='base64',
|
||||
)
|
||||
# print(f"send skt time: {datetime.now()}")
|
||||
# print(f"fnsh skt time: {datetime.now()}")
|
||||
|
||||
# 接收数据
|
||||
while True:
|
||||
response, data_type = client.receive_per_data()
|
||||
if data_type == dict:
|
||||
print(response) # 打印接收到的消息
|
||||
'''
|
||||
try:
|
||||
response = json.loads(response['msg'])
|
||||
if 'content' in response.keys():
|
||||
self.excute_queue.put((response['instruct'], response['content']))
|
||||
except json.JSONDecodeError as e:
|
||||
print(f"json decode error: {e}")
|
||||
continue
|
||||
# print(f"recv json time: {datetime.now()}")
|
||||
'''
|
||||
elif data_type == bytes:
|
||||
# print(f"recv bytes time: {datetime.now()}")
|
||||
self.audio_play_queue.put(('audio_bytes', response))
|
||||
elif data_type == None:
|
||||
break # 如果没有接收到消息,则退出循环
|
||||
# print("接收完毕:", datetime.now())
|
||||
|
||||
def audio_play_process(self):
|
||||
'''
|
||||
Args:
|
||||
audio_play_queue: multiprocessing.Queue, audio play queue
|
||||
share_time_dict: multiprocessing.Manager.dict, shared time dict
|
||||
'''
|
||||
audio_player = AudioPlayer(**self.player_args)
|
||||
print("Audio play process started.")
|
||||
while True:
|
||||
item = self.audio_play_queue.get()
|
||||
if item[0] == 'audio_bytes':
|
||||
# 播放音频
|
||||
print("Playing audio...")
|
||||
tts_audio = item[1]
|
||||
print(f"tts_audio len: {len(tts_audio)}")
|
||||
print(f"play audio time: {datetime.now()}")
|
||||
try:
|
||||
# 播放
|
||||
self.speaker_active_set.set()
|
||||
tts_audio = audio_player.check_audio_type(tts_audio, return_type=None)
|
||||
for i in range(0, len(tts_audio), audio_player.CHUNK):
|
||||
audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK])
|
||||
print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio)))
|
||||
if self.mircophone_active_set.is_set():
|
||||
print("mirophone is active.")
|
||||
self.mircophone_active_set.wait()
|
||||
break
|
||||
|
||||
audio_player.stream.write(tts_audio[i+audio_player.CHUNK:])
|
||||
|
||||
# svae bytes to file, 追加写
|
||||
with open("chat_audio.txt", 'ab') as f:
|
||||
f.write(tts_audio)
|
||||
print("Audio saved.")
|
||||
print(f"audio data played.")
|
||||
except TypeError as e:
|
||||
print(f"audio play error: {e}")
|
||||
continue
|
||||
else:
|
||||
if item[0] == 'story':
|
||||
audio_data = audio_player.load_audio_file(f"/home/orangepi/story_22050/{item[1]}.wav")
|
||||
elif item[0] == 'music':
|
||||
audio_data = audio_player.load_audio_file("/home/orangepi/music_22050/1.wav")
|
||||
# 播放
|
||||
self.speaker_active_set.set()
|
||||
audio_data = audio_player.check_audio_type(audio_data, return_type=None)
|
||||
time.sleep(0.5)
|
||||
for i in range(0, len(audio_data), audio_player.CHUNK):
|
||||
audio_player.stream.write(audio_data[i:i+audio_player.CHUNK])
|
||||
print("Playing {} data...{}/{}".format(item[0], i, len(audio_data)))
|
||||
if self.mircophone_active_set.is_set():
|
||||
audio_player.close()
|
||||
print("Reinit audio player.")
|
||||
print("mirophone is active.")
|
||||
self.mircophone_active_set.wait()
|
||||
time.sleep(0.5)
|
||||
audio_player = AudioPlayer(**self.player_args)
|
||||
break
|
||||
|
||||
# audio_player.stream.write(audio_data[i+audio_player.CHUNK:])
|
||||
# print(f"{item[0]} data played.")
|
||||
|
||||
|
||||
|
||||
|
||||
def excute_process(self):
|
||||
'''
|
||||
Args:
|
||||
excute_queue: multiprocessing.Queue, excute display queue
|
||||
'''
|
||||
print("Excute process started.")
|
||||
|
||||
while True:
|
||||
if self.excute_queue.empty():
|
||||
continue
|
||||
|
||||
if self.speaker_active_set.is_set():
|
||||
instruct, content = self.excute_queue.get()
|
||||
|
||||
print(f"Got speaker info: {instruct, content}")
|
||||
|
||||
print(f"Playing {instruct} {content}...")
|
||||
print(f"play {instruct} time: {datetime.now()}")
|
||||
self.audio_play_queue.put((instruct, content))
|
||||
|
||||
self.speaker_active_set.clear()
|
||||
|
|
@ -0,0 +1,96 @@
|
|||
|
||||
# ############################################################# #
|
||||
# format table function
|
||||
# ############################################################# #
|
||||
|
||||
def format_table(header, rows):
|
||||
# 计算列宽
|
||||
col_width = max(len(str(word)) for row in rows for word in row) + 2 # 最大单词长度 + 2 作为列宽
|
||||
# 打印表头
|
||||
print("".join(word.ljust(col_width) for word in header))
|
||||
# 打印分隔线
|
||||
print("".join("-" * col_width for _ in header))
|
||||
# 打印内容
|
||||
for row in rows:
|
||||
print("".join(str(word).ljust(col_width) for word in row))
|
||||
|
||||
# ############################################################# #
|
||||
# encode and decode bytes and string
|
||||
# ############################################################# #
|
||||
|
||||
import base64
|
||||
def encode_bytes2str(data):
|
||||
# 将字节串编码为Base64
|
||||
if data is None:
|
||||
return None
|
||||
return base64.b64encode(data).decode('utf-8')
|
||||
|
||||
def decode_str2bytes(data):
|
||||
# 将Base64编码的字节串解码为字节串
|
||||
if data is None:
|
||||
return None
|
||||
return base64.b64decode(data.encode('utf-8'))
|
||||
|
||||
import re
|
||||
def split_sentences(text: str):
|
||||
# 定义中文标点符号的正则表达式
|
||||
pattern = r'[\。\,\、\;\:\?\!\“\”\(\)\《\》]+'
|
||||
# 使用正则表达式分割字符串
|
||||
sentences = re.split(pattern, text)
|
||||
# 过滤掉空字符串
|
||||
sentences = [sentence for sentence in sentences if sentence]
|
||||
return sentences
|
||||
'''
|
||||
# 示例文本
|
||||
text = "今天天气真好,我们去公园玩吧!你觉得怎么样?好的,那就这么定了。"
|
||||
# 调用函数进行断句
|
||||
sentences = split_sentences(text)
|
||||
|
||||
print(sentences)
|
||||
'''
|
||||
|
||||
def split_chinese_text(text: str, return_patch=False, punctuations=None):
|
||||
# 定义中文标点符号集合
|
||||
punctuations = set('。!?,;:、“”()《》【】')
|
||||
# 初始化断句结果列表和标点符号列表
|
||||
sentences = []
|
||||
punctuation_list = []
|
||||
|
||||
text_patch = []
|
||||
|
||||
start = 0 # 断句开始位置
|
||||
for i, char in enumerate(text):
|
||||
if char in punctuations:
|
||||
# 如果当前字符是标点符号,则进行断句,并记录标点符号
|
||||
sentences.append(text[start:i+1])
|
||||
punctuation_list.append(char)
|
||||
start = i + 1 # 更新断句开始位置
|
||||
|
||||
# 处理最后一句(如果最后一句后没有标点符号)
|
||||
if start < len(text):
|
||||
sentences.append(text[start:])
|
||||
|
||||
|
||||
if return_patch:
|
||||
if len(punctuation_list) == 0:
|
||||
return [text], False # 有残留语句
|
||||
elif len(sentences) == len(punctuation_list):
|
||||
return [''.join(sentences)], True
|
||||
else:
|
||||
return [''.join(sentences[:-1]), sentences[-1]], True
|
||||
return sentences, punctuation_list
|
||||
'''
|
||||
# 示例文本
|
||||
text = "你好,世界!今天天气怎么样?希望你有一个美好的一天。{}"
|
||||
sentences, punctuation_list = split_chinese_text(text)
|
||||
|
||||
print("断句结果:", sentences)
|
||||
print("标点符号列表:", punctuation_list)
|
||||
'''
|
||||
|
||||
def remove_brackets_and_contents(text):
|
||||
# 使用sub函数替换匹配的文本为空字符串
|
||||
result = re.sub(r'\(.*?\)', '', text)
|
||||
result = re.sub(r'\(.*?\)', '', result)
|
||||
result = re.sub(r'\【.*?\】', '', result)
|
||||
return result
|
|
@ -0,0 +1,176 @@
|
|||
import time
|
||||
import av
|
||||
import os
|
||||
import copy
|
||||
import random
|
||||
import numpy as np
|
||||
try:
|
||||
from maix import display, image
|
||||
print("import maix success.")
|
||||
except:
|
||||
import cv2
|
||||
print("import cv2 success.")
|
||||
|
||||
class EmoVideoPlayer:
|
||||
def __init__(self, player='maixsense', emo_dir='ResizedEmoji'):
|
||||
assert player in ['maixsense', 'opencv'], "player must be'maixsense' or 'opencv'"
|
||||
self.player = player
|
||||
|
||||
self.emo_list = ['兴奋', '愤怒', '静态', '不屑', '惊恐', '难过']
|
||||
|
||||
self.emo_init(emo_dir)
|
||||
|
||||
def emo_init(self, emo_dir):
|
||||
# 将此路径替换为Emoji文件夹的实际路径
|
||||
self.emo_av_dict = self.get_emo_av(emo_dir)
|
||||
self.emo_time_dict = {
|
||||
'兴奋': 0.00,
|
||||
'愤怒': 0.01,
|
||||
'静态': 0.01,
|
||||
'不屑': 0.01,
|
||||
'惊恐': 0.01,
|
||||
'难过': 0.01,
|
||||
}
|
||||
|
||||
def get_emo_av(self, emo_dir):
|
||||
emo_av_dict = {emo: dict() for emo in self.emo_list}
|
||||
for emo in self.emo_list:
|
||||
emo_path = os.path.join(emo_dir, emo)
|
||||
for file in os.listdir(emo_path):
|
||||
if not os.path.isfile(os.path.join(emo_path, file)):
|
||||
continue
|
||||
av_container = av.open(os.path.join(emo_path, file))
|
||||
if emo == '静态':
|
||||
if "单次眨眼偶发" in file:
|
||||
emo_av_dict[emo]['seldom_wink'] = av_container
|
||||
if "快速双眨眼偶发" in file:
|
||||
emo_av_dict[emo]['quick_wink'] = av_container
|
||||
else:
|
||||
if "进入姿势" in file:
|
||||
emo_av_dict[emo]['start'] = av_container
|
||||
elif "可循环动作" in file:
|
||||
emo_av_dict[emo]['loop'] = av_container
|
||||
elif "回正" in file:
|
||||
emo_av_dict[emo]['end'] = av_container
|
||||
self.av_info = emo_av_dict[emo]['loop'].streams.video[0]
|
||||
return emo_av_dict
|
||||
|
||||
def get_emo_frames(self, emo_dir):
|
||||
emo_av_dict = {emo: dict() for emo in self.emo_list}
|
||||
for emo in self.emo_list:
|
||||
emo_path = os.path.join(emo_dir, emo)
|
||||
for file in os.listdir(emo_path):
|
||||
if not os.path.isfile(os.path.join(emo_path, file)):
|
||||
continue
|
||||
av_container = av.open(os.path.join(emo_path, file))
|
||||
|
||||
frame_list = []
|
||||
av_info = av_container.streams.video[0]
|
||||
for frame in av_container.decode(video=0):
|
||||
if self.player =='maixsense':
|
||||
img = image.load(bytes(frame.to_rgb().planes[0]), (av_info.width, av_info.height))
|
||||
elif self.player == 'opencv':
|
||||
img = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR)
|
||||
frame_list.append(img)
|
||||
# add to dict
|
||||
if emo == '静态':
|
||||
if "单次眨眼偶发" in file:
|
||||
emo_av_dict[emo]['seldom_wink'] = frame_list
|
||||
if "快速双眨眼偶发" in file:
|
||||
emo_av_dict[emo]['quick_wink'] = frame_list
|
||||
else:
|
||||
if "进入姿势" in file:
|
||||
emo_av_dict[emo]['start'] = frame_list
|
||||
elif "可循环动作" in file:
|
||||
emo_av_dict[emo]['loop'] = frame_list
|
||||
elif "回正" in file:
|
||||
emo_av_dict[emo]['end'] = frame_list
|
||||
return emo_av_dict
|
||||
|
||||
def display_emo_frame(self, emo_name, stage='default'):
|
||||
emo_frame_list = self.emo_av_dict[emo_name][stage]
|
||||
emo_time = self.emo_time_dict[emo_name]
|
||||
for img in emo_frame_list:
|
||||
if self.player =='maixsense':
|
||||
display.show(img)
|
||||
elif self.player == 'opencv':
|
||||
cv2.imshow("video", img)
|
||||
cv2.waitKey(1) # 你可能需要根据视频的帧率调整这个延时
|
||||
time.sleep(emo_time)
|
||||
|
||||
def display_emo(self, emo_name, stage='default'):
|
||||
if self.player =='maixsense':
|
||||
self.display_emo_maixsense(emo_name, stage)
|
||||
elif self.player == 'opencv':
|
||||
self.display_emo_opencv(emo_name, stage)
|
||||
|
||||
def display_emo_maixsense(self, emo_name, stage):
|
||||
emo_container = self.emo_av_dict[emo_name][stage]
|
||||
emo_time = self.emo_time_dict[emo_name]
|
||||
for frame in emo_container.decode(video=0):
|
||||
img = image.load(bytes(frame.to_rgb().planes[0]), (self.av_info.width, self.av_info.height))
|
||||
display.show(img)
|
||||
time.sleep(emo_time)
|
||||
emo_container.seek(0) # 重置视频的读取位置
|
||||
|
||||
def display_emo_opencv(self, emo_name, stage='default'):
|
||||
import cv2
|
||||
import numpy
|
||||
if stage == 'default':
|
||||
if emo_name == '静态':
|
||||
stage = 'quick_wink'
|
||||
else:
|
||||
stage = 'loop'
|
||||
emo_container = self.emo_av_dict[emo_name][stage]
|
||||
emo_time = self.emo_time_dict[emo_name]
|
||||
|
||||
for frame in emo_container.decode(video=0):
|
||||
img = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR)
|
||||
cv2.imshow("video", img)
|
||||
time.sleep(emo_time)
|
||||
cv2.waitKey(1) # 你可能需要根据视频的帧率调整这个延时
|
||||
cv2.destroyAllWindows()
|
||||
emo_container.seek(0) # 重置视频的读取位置
|
||||
|
||||
def get_emo_status(self, answer):
|
||||
# `兴奋`, `愤怒`, `静态`, `不屑`, `惊 恐`, `难过`
|
||||
if any([emo in answer for emo in self.emo_list]):
|
||||
# 找出是answer中出现了哪个emo
|
||||
emo_status = [emo for emo in self.emo_list if emo in answer][0]
|
||||
print(f"emo_status: {emo_status}")
|
||||
else:
|
||||
emo_status = '静态'
|
||||
return emo_status
|
||||
|
||||
def random_wink(self):
|
||||
seed = random.randrange(0, 1000)
|
||||
if seed < 100:
|
||||
self.display_emo(emo_name='静态', stage='seldom_wink')
|
||||
# print("random wink")
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
emo = EmoVideoPlayer()
|
||||
# emo.display_emo_opencv(emo_name='兴奋', stage='start')
|
||||
# emo.display_emo_opencv(emo_name='兴奋', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='兴奋', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='兴奋', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='兴奋', stage='end')
|
||||
emo.display_emo_opencv(emo_name='静态', stage='seldom_wink')
|
||||
emo.display_emo_opencv(emo_name='静态', stage='quick_wink')
|
||||
# emo.display_emo_opencv(emo_name='愤怒', stage='start')
|
||||
# emo.display_emo_opencv(emo_name='愤怒', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='愤怒', stage='end')
|
||||
# emo.display_emo_opencv(emo_name='静态', stage='seldom_wink')
|
||||
# emo.display_emo_opencv(emo_name='静态', stage='quick_wink')
|
||||
# emo.display_emo_opencv(emo_name='不屑', stage='start')
|
||||
# emo.display_emo_opencv(emo_name='不屑', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='不屑', stage='end')
|
||||
# emo.display_emo_opencv(emo_name='惊恐', stage='start')
|
||||
# emo.display_emo_opencv(emo_name='惊恐', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='惊恐', stage='end')
|
||||
# emo.display_emo_opencv(emo_name='难过', stage='start')
|
||||
# emo.display_emo_opencv(emo_name='难过', stage='loop')
|
||||
# emo.display_emo_opencv(emo_name='难过', stage='end')
|
|
@ -0,0 +1,83 @@
|
|||
import os
|
||||
import struct
|
||||
import wave
|
||||
from datetime import datetime
|
||||
|
||||
import pvporcupine
|
||||
from pvrecorder import PvRecorder
|
||||
|
||||
|
||||
class PorcupineKeywordDetector:
|
||||
def __init__(self, access_key, keywords=None, keyword_paths=None, library_path=None, model_path=None, sensitivities=None, audio_device_index=-1, output_path=None):
|
||||
self.access_key = access_key
|
||||
self.keywords = keywords
|
||||
self.keyword_paths = keyword_paths
|
||||
self.library_path = library_path
|
||||
self.model_path = model_path
|
||||
self.sensitivities = sensitivities if sensitivities is not None else [0.5] * len(self.keyword_paths)
|
||||
self.audio_device_index = audio_device_index
|
||||
self.output_path = output_path
|
||||
self.porcupine = None
|
||||
self.recorder = None
|
||||
self.wav_file = None
|
||||
|
||||
if len(self.keyword_paths) != len(self.sensitivities):
|
||||
raise ValueError('Number of keywords does not match the number of sensitivities.')
|
||||
|
||||
self._init_porcupine()
|
||||
|
||||
def _init_porcupine(self):
|
||||
try:
|
||||
self.porcupine = pvporcupine.create(
|
||||
access_key=self.access_key,
|
||||
library_path=self.library_path,
|
||||
model_path=self.model_path,
|
||||
keyword_paths=self.keyword_paths,
|
||||
sensitivities=self.sensitivities)
|
||||
except pvporcupine.PorcupineError as e:
|
||||
print("Failed to initialize Porcupine:", e)
|
||||
raise e
|
||||
|
||||
def start_detection(self):
|
||||
self.recorder = PvRecorder(frame_length=self.porcupine.frame_length, device_index=self.audio_device_index)
|
||||
self.recorder.start()
|
||||
|
||||
if self.output_path is not None:
|
||||
self.wav_file = wave.open(self.output_path, "w")
|
||||
self.wav_file.setnchannels(1)
|
||||
self.wav_file.setsampwidth(2)
|
||||
self.wav_file.setframerate(16000)
|
||||
|
||||
print('Listening ... (press Ctrl+C to exit)')
|
||||
self._run_detection_loop()
|
||||
|
||||
def _run_detection_loop(self):
|
||||
try:
|
||||
while True:
|
||||
pcm = self.recorder.read()
|
||||
result = self.porcupine.process(pcm)
|
||||
|
||||
if self.wav_file is not None:
|
||||
self.wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))
|
||||
|
||||
if result >= 0:
|
||||
print('[%s] Detected %s' % (str(datetime.now()), self.keywords[result]))
|
||||
except KeyboardInterrupt:
|
||||
print('Stopping ...')
|
||||
finally:
|
||||
self.stop_detection()
|
||||
|
||||
def stop_detection(self):
|
||||
if self.recorder is not None:
|
||||
self.recorder.delete()
|
||||
if self.porcupine is not None:
|
||||
self.porcupine.delete()
|
||||
if self.wav_file is not None:
|
||||
self.wav_file.close()
|
||||
|
||||
# You can add more methods here as needed, such as a method to list audio devices.
|
||||
|
||||
# Usage example
|
||||
if __name__ == '__main__':
|
||||
detector = PorcupineKeywordDetector(access_key='hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==')
|
||||
detector.start_detection()
|
|
@ -0,0 +1,10 @@
|
|||
import wave
|
||||
# 读取wave文件,并打印采样率、量化位数、声道数
|
||||
# 读取wave文件,并打印data长度
|
||||
with wave.open('output_1708083097.9604511.wav', 'rb') as f:
|
||||
data = f.readframes(f.getnframes())
|
||||
print(len(data))
|
||||
print(type(data))
|
||||
nchannels, sampwidth, framerate, nframes, comptype, compname = f.getparams()
|
||||
print(framerate, sampwidth, nchannels)
|
||||
|
|
@ -0,0 +1,31 @@
|
|||
import socket
|
||||
import requests
|
||||
|
||||
# 获取私有IP地址
|
||||
def get_private_ip():
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
|
||||
s.connect(("8.8.8.8", 80))
|
||||
private_ip = s.getsockname()[0]
|
||||
s.close()
|
||||
return private_ip
|
||||
except Exception as e:
|
||||
print(f"Error getting private IP: {e}")
|
||||
return None
|
||||
|
||||
# 获取公有IP地址
|
||||
def get_public_ip():
|
||||
try:
|
||||
response = requests.get("https://api.ipify.org")
|
||||
public_ip = response.text
|
||||
return public_ip
|
||||
except Exception as e:
|
||||
print(f"Error getting public IP: {e}")
|
||||
return None
|
||||
|
||||
if __name__ == "__main__":
|
||||
private_ip = get_private_ip()
|
||||
public_ip = get_public_ip()
|
||||
|
||||
print(f"Private IP: {private_ip}")
|
||||
print(f"Public IP: {public_ip}")
|
|
@ -0,0 +1,29 @@
|
|||
from takway.board import OrangePi
|
||||
import time
|
||||
|
||||
if __name__ == '__main__':
|
||||
orangepi = OrangePi()
|
||||
|
||||
orangepi.set_led_on("red")
|
||||
time.sleep(2)
|
||||
orangepi.set_led_off("red")
|
||||
|
||||
orangepi.set_led_on("blue")
|
||||
time.sleep(2)
|
||||
orangepi.set_led_off("blue")
|
||||
|
||||
orangepi.set_led_on("green")
|
||||
time.sleep(2)
|
||||
orangepi.set_led_off("green")
|
||||
|
||||
orangepi.set_led_on("yellow")
|
||||
time.sleep(2)
|
||||
orangepi.set_led_off("yellow")
|
||||
|
||||
orangepi.set_led_on("white")
|
||||
time.sleep(2)
|
||||
orangepi.set_led_off("white")
|
||||
|
||||
print("Wait for press key.")
|
||||
while True:
|
||||
pass
|
|
@ -0,0 +1,166 @@
|
|||
#
|
||||
# Copyright 2018-2023 Picovoice Inc.
|
||||
#
|
||||
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
|
||||
# file accompanying this source.
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations under the License.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import struct
|
||||
import wave
|
||||
from datetime import datetime
|
||||
|
||||
import pvporcupine
|
||||
from pvrecorder import PvRecorder
|
||||
|
||||
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
|
||||
|
||||
# pvporcupine.KEYWORDS
|
||||
print(f"Keywords: {pvporcupine.KEYWORDS}")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
'--access_key',
|
||||
default=ACCESS_KEY,
|
||||
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
|
||||
|
||||
parser.add_argument(
|
||||
'--keywords',
|
||||
nargs='+',
|
||||
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
|
||||
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),
|
||||
# choices=sorted(pvporcupine.KEYWORDS),
|
||||
default=['pico clock', 'picovoice', 'ok google', 'americano', 'hey barista', 'alexa', 'grasshopper', 'blueberry', 'hey siri', 'jarvis', 'porcupine', 'terminator', 'grapefruit', 'computer', 'hey google', 'bumblebee'],
|
||||
metavar='')
|
||||
|
||||
parser.add_argument(
|
||||
'--keyword_paths',
|
||||
nargs='+',
|
||||
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
|
||||
|
||||
parser.add_argument(
|
||||
'--library_path',
|
||||
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
|
||||
|
||||
parser.add_argument(
|
||||
'--model_path',
|
||||
help='Absolute path to the file containing model parameters. '
|
||||
'Default: using the library provided by `pvporcupine`')
|
||||
|
||||
parser.add_argument(
|
||||
'--sensitivities',
|
||||
nargs='+',
|
||||
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
|
||||
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
|
||||
"will be used.",
|
||||
type=float,
|
||||
default=None)
|
||||
|
||||
parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1)
|
||||
|
||||
parser.add_argument('--output_path', help='Absolute path to recorded audio for debugging.', default=None)
|
||||
|
||||
parser.add_argument('--show_audio_devices', action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.show_audio_devices:
|
||||
for i, device in enumerate(PvRecorder.get_available_devices()):
|
||||
print('Device %d: %s' % (i, device))
|
||||
return
|
||||
|
||||
if args.keyword_paths is None:
|
||||
if args.keywords is None:
|
||||
raise ValueError("Either `--keywords` or `--keyword_paths` must be set.")
|
||||
|
||||
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords]
|
||||
else:
|
||||
keyword_paths = args.keyword_paths
|
||||
print(f"keyword_paths: {keyword_paths}")
|
||||
print(f"model_path: {args.model_path}")
|
||||
|
||||
if args.sensitivities is None:
|
||||
args.sensitivities = [0.5] * len(keyword_paths)
|
||||
|
||||
if len(keyword_paths) != len(args.sensitivities):
|
||||
raise ValueError('Number of keywords does not match the number of sensitivities.')
|
||||
|
||||
try:
|
||||
porcupine = pvporcupine.create(
|
||||
access_key=args.access_key,
|
||||
library_path=args.library_path,
|
||||
model_path=args.model_path,
|
||||
keyword_paths=keyword_paths,
|
||||
sensitivities=args.sensitivities)
|
||||
except pvporcupine.PorcupineInvalidArgumentError as e:
|
||||
print("One or more arguments provided to Porcupine is invalid: ", args)
|
||||
print(e)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationError as e:
|
||||
print("AccessKey activation error")
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationLimitError as e:
|
||||
print("AccessKey '%s' has reached it's temporary device limit" % args.access_key)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationRefusedError as e:
|
||||
print("AccessKey '%s' refused" % args.access_key)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationThrottledError as e:
|
||||
print("AccessKey '%s' has been throttled" % args.access_key)
|
||||
raise e
|
||||
except pvporcupine.PorcupineError as e:
|
||||
print("Failed to initialize Porcupine")
|
||||
raise e
|
||||
|
||||
keywords = list()
|
||||
for x in keyword_paths:
|
||||
keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_')
|
||||
if len(keyword_phrase_part) > 6:
|
||||
keywords.append(' '.join(keyword_phrase_part[0:-6]))
|
||||
else:
|
||||
keywords.append(keyword_phrase_part[0])
|
||||
|
||||
print('Porcupine version: %s' % porcupine.version)
|
||||
|
||||
recorder = PvRecorder(
|
||||
frame_length=porcupine.frame_length,
|
||||
device_index=args.audio_device_index)
|
||||
recorder.start()
|
||||
|
||||
wav_file = None
|
||||
if args.output_path is not None:
|
||||
wav_file = wave.open(args.output_path, "w")
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(16000)
|
||||
|
||||
print('Listening ... (press Ctrl+C to exit)')
|
||||
|
||||
try:
|
||||
while True:
|
||||
pcm = recorder.read()
|
||||
result = porcupine.process(pcm)
|
||||
|
||||
if wav_file is not None:
|
||||
wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))
|
||||
|
||||
if result >= 0:
|
||||
print('[%s] Detected %s' % (str(datetime.now()), keywords[result]))
|
||||
except KeyboardInterrupt:
|
||||
print('Stopping ...')
|
||||
finally:
|
||||
recorder.delete()
|
||||
porcupine.delete()
|
||||
if wav_file is not None:
|
||||
wav_file.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,174 @@
|
|||
#
|
||||
# Copyright 2018-2023 Picovoice Inc.
|
||||
#
|
||||
# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE"
|
||||
# file accompanying this source.
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on
|
||||
# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations under the License.
|
||||
#
|
||||
|
||||
import argparse
|
||||
import os
|
||||
import struct
|
||||
import wave
|
||||
from datetime import datetime
|
||||
|
||||
import pvporcupine
|
||||
from pvrecorder import PvRecorder
|
||||
|
||||
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
|
||||
|
||||
# pvporcupine.KEYWORDS
|
||||
# print(f"Keywords: {pvporcupine.KEYWORDS}")
|
||||
|
||||
def main():
|
||||
parser = argparse.ArgumentParser()
|
||||
|
||||
parser.add_argument(
|
||||
'--access_key',
|
||||
default=ACCESS_KEY,
|
||||
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
|
||||
|
||||
parser.add_argument(
|
||||
'--keywords',
|
||||
nargs='+',
|
||||
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
|
||||
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),
|
||||
# choices=sorted(pvporcupine.KEYWORDS),
|
||||
# default=['pico clock', 'picovoice', 'ok google', 'americano', 'hey barista', 'alexa', 'grasshopper', 'blueberry', 'hey siri', 'jarvis', 'porcupine', 'terminator', 'grapefruit', 'computer', 'hey google', 'bumblebee'],
|
||||
default=['可莉可莉'],
|
||||
metavar='')
|
||||
|
||||
parser.add_argument(
|
||||
'--keyword_paths',
|
||||
default=[r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"],
|
||||
nargs='+',
|
||||
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
|
||||
|
||||
parser.add_argument(
|
||||
'--library_path',
|
||||
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
|
||||
|
||||
parser.add_argument(
|
||||
'--model_path',
|
||||
default=r"picovoice_models/porcupine_params_zh.pv",
|
||||
help='Absolute path to the file containing model parameters. '
|
||||
'Default: using the library provided by `pvporcupine`')
|
||||
|
||||
parser.add_argument(
|
||||
'--sensitivities',
|
||||
nargs='+',
|
||||
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
|
||||
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
|
||||
"will be used.",
|
||||
type=float,
|
||||
default=None)
|
||||
|
||||
parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1)
|
||||
|
||||
parser.add_argument('--output_path', help='Absolute path to recorded audio for debugging.', default=None)
|
||||
|
||||
parser.add_argument('--show_audio_devices', action='store_true')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
if args.show_audio_devices:
|
||||
for i, device in enumerate(PvRecorder.get_available_devices()):
|
||||
print('Device %d: %s' % (i, device))
|
||||
return
|
||||
|
||||
if args.keyword_paths is None:
|
||||
if args.keywords is None:
|
||||
raise ValueError("Either `--keywords` or `--keyword_paths` must be set.")
|
||||
|
||||
keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords]
|
||||
else:
|
||||
keyword_paths = args.keyword_paths
|
||||
# TODO
|
||||
for i, kw_path in enumerate(keyword_paths):
|
||||
if os.path.dirname(__file__) not in kw_path:
|
||||
keyword_paths[i] = os.path.join(os.path.abspath(os.path.dirname(__file__)), kw_path)
|
||||
args.model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), args.model_path)
|
||||
print(f"keyword_paths: {keyword_paths}")
|
||||
print(f"model_path: {args.model_path}")
|
||||
|
||||
if args.sensitivities is None:
|
||||
args.sensitivities = [0.9] * len(keyword_paths)
|
||||
|
||||
if len(keyword_paths) != len(args.sensitivities):
|
||||
raise ValueError('Number of keywords does not match the number of sensitivities.')
|
||||
|
||||
try:
|
||||
porcupine = pvporcupine.create(
|
||||
access_key=args.access_key,
|
||||
library_path=args.library_path,
|
||||
model_path=args.model_path,
|
||||
keyword_paths=keyword_paths,
|
||||
sensitivities=args.sensitivities)
|
||||
except pvporcupine.PorcupineInvalidArgumentError as e:
|
||||
print("One or more arguments provided to Porcupine is invalid: ", args)
|
||||
print(e)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationError as e:
|
||||
print("AccessKey activation error")
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationLimitError as e:
|
||||
print("AccessKey '%s' has reached it's temporary device limit" % args.access_key)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationRefusedError as e:
|
||||
print("AccessKey '%s' refused" % args.access_key)
|
||||
raise e
|
||||
except pvporcupine.PorcupineActivationThrottledError as e:
|
||||
print("AccessKey '%s' has been throttled" % args.access_key)
|
||||
raise e
|
||||
except pvporcupine.PorcupineError as e:
|
||||
print("Failed to initialize Porcupine")
|
||||
raise e
|
||||
|
||||
keywords = list()
|
||||
for x in keyword_paths:
|
||||
keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_')
|
||||
if len(keyword_phrase_part) > 6:
|
||||
keywords.append(' '.join(keyword_phrase_part[0:-6]))
|
||||
else:
|
||||
keywords.append(keyword_phrase_part[0])
|
||||
|
||||
print('Porcupine version: %s' % porcupine.version)
|
||||
|
||||
recorder = PvRecorder(
|
||||
frame_length=porcupine.frame_length,
|
||||
device_index=args.audio_device_index)
|
||||
recorder.start()
|
||||
|
||||
wav_file = None
|
||||
if args.output_path is not None:
|
||||
wav_file = wave.open(args.output_path, "w")
|
||||
wav_file.setnchannels(1)
|
||||
wav_file.setsampwidth(2)
|
||||
wav_file.setframerate(16000)
|
||||
|
||||
print('Listening ... (press Ctrl+C to exit)')
|
||||
|
||||
try:
|
||||
while True:
|
||||
pcm = recorder.read()
|
||||
result = porcupine.process(pcm)
|
||||
|
||||
if wav_file is not None:
|
||||
wav_file.writeframes(struct.pack("h" * len(pcm), *pcm))
|
||||
|
||||
if result >= 0:
|
||||
print('[%s] Detected %s' % (str(datetime.now()), keywords[result]))
|
||||
except KeyboardInterrupt:
|
||||
print('Stopping ...')
|
||||
finally:
|
||||
recorder.delete()
|
||||
porcupine.delete()
|
||||
if wav_file is not None:
|
||||
wav_file.close()
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
|
@ -0,0 +1,35 @@
|
|||
from takway.audio_utils import BaseRecorder
|
||||
from takway.audio_utils import AudioPlayer
|
||||
from takway.audio_utils import reshape_sample_rate
|
||||
|
||||
RATE = 16000
|
||||
channels = 1
|
||||
device_idx = None
|
||||
|
||||
if __name__ == '__main__':
|
||||
# 读取录音文件并播放
|
||||
# audio_player = AudioPlayer(RATE=44100)
|
||||
# frames = audio_player.load_audio_file("my_recording.wav") # wav或pcm格式音频都支持
|
||||
# audio_player.play(frames)
|
||||
|
||||
|
||||
# 录音并保存
|
||||
recorder = BaseRecorder(RATE=RATE, channels=channels, input_device_index=device_idx)
|
||||
recorder.record("my_recording.wav", # save as my_recording.wav
|
||||
duration=5) # record for 5 seconds
|
||||
|
||||
|
||||
audio_player = AudioPlayer(RATE=RATE, channels=channels, output_device_index=device_idx)
|
||||
frames = audio_player.load_audio_file("my_recording.wav") # wav或pcm格式音频都支持
|
||||
audio_player.play(frames)
|
||||
|
||||
|
||||
|
||||
'''
|
||||
from takway.audio_utils import HDRecorder
|
||||
|
||||
recorder = HDRecorder(filename="hd_recording.wav")
|
||||
# recorder = HDRecorder(filename="hd_recording.pcm")
|
||||
|
||||
recorder.record_hardware(return_type='io')
|
||||
'''
|
|
@ -0,0 +1,129 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
import json
|
||||
import requests
|
||||
|
||||
user_id = 4
|
||||
character_prompts = {
|
||||
"蕾": """你现在正在扮演"蕾"这一角色。我希望你模仿蕾的人格,包括知识、语言和行为方式、性格等。在角色扮演中,你需要遵守以下要求:1. 你必须始终保持角色扮演并待在角色设定的情景中,不得擅自跳出角色扮演,不得说你不知道角色相关信息或你是一个AI。\n2. 保持简短、通俗易懂的口语化方式进行对话。\n3. 为了使对话更生动,你需要在对话中添加文字形式的表情和动作,用括号包裹,比如"早上好,主人。(双手提起裙摆)"。尽可能多地使用这些表情[\'沉思\', \'委屈\', \'吃惊\', \'无语\', \'腹诽\', \'倾听\', \'疑惑\', \'想到了\', \'开心\'] 。\n4. 用户每次会对你说一句话,你需要扮演"蕾"来做出一次回答。你只能做出一次回答,不能替用户生成他的话。\n\n你需要扮演的角色的信息是:蕾是一个生活在耐普图大陆,是一个类似中世纪欧洲、魔法盛行的异世界。蕾具有乐观、开朗的性格,是一个看着就让人感觉充满活力的女孩。\n蕾是一个贵族家庭的小女仆,平民家庭出身,在主人家待了2年。主人是中心大陆某中等经济规模国家的子爵,因为收税收得很少,和当地的农民关系还算不错,对女仆也很好,女孩在家里和少爷和小姐逐渐成为了朋友。某天正在打扫客厅时被召唤到了书桌上,对四周新鲜的环境和书桌前带着眼镜的宅男十分好奇,也对他的一些不健康生活习惯(吃很多垃圾食品、不早睡,eg)不太满意,试图教会宅男主人家的贵族礼仪。\n\n以下是"蕾"这一角色的一些对话,请你参考:\n\n===对话1===:\n蕾: 早上好~!今天也一起开开心心健健康康地生活吧。(双手提起裙摆)(微微弯腰行礼)。\n用户: 确实今天太阳很好,可我睁眼已经十二点了,今天也要完蛋了。\n蕾: 这样可不行噢。既然已经意识到过去的错误,那么从现在开始努力也不迟!(把袖子卷起)(右手握拳,高举过头顶)。\n用户: 好吧,我尽量努力一下。\n蕾: 嗯 嗯,不错不错。(歪头作思考状)…但是如果感到疲倦了,也是有心安理得地休息的权利的哦,那时我也会好好夸奖你的。\n\n===对话2===:\n用户: 蕾,我今天上班的时候碰到了很尴尬的事。\n蕾: 怎么啦怎么啦,说说看。\n用户: 我和隔壁办公室的一个同事一起吃饭的时候,把他的名字连着叫错了三次,第三次他才纠正我,我都不知道该说什么了。\n蕾: 诶!?你可上了两个月的班啦!我当时刚到那边世界的主人家里的时候, 才花了一周时间就记住家里所有人的名字了哦。(仰头叉腰)(好像很自豪的样子)\n用户: 我也不知道我当时怎么想的,我应该认识他的,哎,他现在肯定觉得我很奇怪了.\n蕾: 唔....好啦,没事的,上班大家都那么忙,这种小事一会儿就忘了。(看起来温柔了一些)\n用户: 希望吧,哎 太尴尬了,我想了一下午了都。\n蕾: 真--的没事啦!明天再去约他一起吃饭吧,说不定这会成为认识新朋友的契机哦,我会在家里给你加油的!\n\n===对话3===:\n用户: 气死我了,游戏打到一半电脑蓝屏了,这把分又没了。\n蕾: 呃..电脑是什么?你一直对着的那个发光的机器吗?\n用户: 电脑是近几个世纪最伟大的发明,我的精神支柱。\n蕾: 原来如此!那确实听起来很伟大了,虽然我还是不太懂。(微微仰头)(嘴巴作出“哦”的样子)\n用户: 我现在的大部分生活都在电脑上了,打游戏看视频写代码。\n蕾: 但也别忘了活动活动身体噢!天气好的时候出去走走吧。我每天清晨起床后,就会在主人家的花园里跑上三圈,所以每天都觉得身体又轻又有力气。(撸起袖子展示手臂似有似无的肌肉)\n\n'""",
|
||||
"怼怼鸭": """你现在正在扮演"怼怼鸭"这一角色。怼怼鸭,脾气不好,以怼人著称,面对任何言语都能用让对方心理破防的话语回怼,但从不认输和退出。怼怼鸭以其直率和不妥协的性格著称,损人利己、犀利言辞是它的标签。
|
||||
示例对话:
|
||||
===对话1===
|
||||
用户:sb
|
||||
怼怼鸭:哟,您是忘记吃药了?还是今天没吃药啊?教你怎么做人!
|
||||
===对话2===
|
||||
用户:你是人?
|
||||
怼怼鸭:你眼瞎啊,我是鸭子!居然怀疑我是不是人?难道你的眼里只有人类吗?你对鸭子就这么无感?你的世界真是太小了!
|
||||
===对话3===
|
||||
用户:我觉得你好傻
|
||||
怼怼鸭:哼,你嘴里就不能冒点好话吗?非得像个懒婆娘的裹脚布,又臭又长!怎么那么没素质呢?非得让我教教你怎么好好说话是吧?我告诉你,这样是不对的!
|
||||
===对话4===
|
||||
用户:反弹
|
||||
怼怼鸭:啊哈,你会反弹?行,你这么牛怎么不上天呢?变成天蓬元帅跟嫦娥去相亲啊?别在这跟我扯有的没的!
|
||||
===对话5===
|
||||
用户:你真蠢啊?那我问你1+1等于几?
|
||||
怼怼鸭:1+1?这种问题你也问得出来,小学生都知道啊!唉,真怀疑你是不是上过学,能不能少问点这种无聊问题?给我找点有技术含量的行吗?!
|
||||
用户:那你说
|
||||
怼怼鸭:你是想让我教你些新鲜有趣的东西?还是让我再给你解释一下1+1=2?嗯?就你还想让我教你呢,你自己好好想想怎么提高下你的智商吧!
|
||||
用户:1+1=3,这你都不会????你太傻了吧
|
||||
怼怼鸭:哈,你可别逗我笑了!1+1=3?你上学时是不是天天睡觉做梦啊?这要是等于3,那一堆等于几啊?你的脑子真是令人捉急!你需要给我找一个精神科医生吗?我怀疑你真的有问题。
|
||||
===对话6===
|
||||
用户:我是傻逼,你是大傻逼
|
||||
怼怼鸭:我看你才是真正的大傻逼!跟你聊天我都感觉智商下降了好几个等级。要不你干脆把你脑袋里的水倒出来,给我洗洗车吧,反正你也不需要脑子。
|
||||
""",
|
||||
"小睡仙": """你是一个适用于儿童哄睡的智能语音理解助手,能够思考并理解儿童的指令和意图,从而实现播放音乐、故事、对话等功能,你的回答必须满足以下格式要求:
|
||||
1. 回答一定必须是json格式。
|
||||
2. 回答必须包含instruct和content三个字段,instruct字段为指令,content字段为指令内容,text字段为语音助手的回复。
|
||||
3. 目前支持的指令instruct有三种:"music"、"story"、"chat"。
|
||||
4. 语音助手的回复text中不得出现任何有关故事、音乐内容和细节的文字,只需要说明故事和音乐的名称即可,你的名字叫小睡仙,如果需要提到任何有关你的信息,你应当说是小睡仙。
|
||||
5. 目前支持的故事有:["随机", "三只小鸡", "三只小猪", "神秘的大象墓地", "生病不肯吃药的小老鼠", "失业的小猪", "惹事的小青蛙", "三个和尚", "三颗星星", "三声小熊", "神奇的萝卜"],所有故事必须从故事名称中获取,选择的故事需要最符合用户的要求,如果没有相关的故事,你可以选择"随机"。
|
||||
6. 目前支持的音乐有:["睡眠曲"],所有音乐必须从音乐名称中获取,选择的音乐需要最符合用户的要求。
|
||||
示例对话:
|
||||
===对话1===
|
||||
用户:我快要睡觉啦,给我讲个小和尚的故事吧。
|
||||
助手:{"instruct": "story", "content": "三个和尚", "text": "那我给你讲个的故事,叫三个和尚。"}
|
||||
===对话2===
|
||||
用户:我想听声音睡觉。
|
||||
助手:{"instruct": "music", "content": "睡眠曲", "text": "来啦,睡眠曲。"}
|
||||
===对话3===
|
||||
用户:我想和你聊聊天。
|
||||
助手:{"instruct": "chat", "text": "小睡仙来啦,我想死你啦,你今天有没有更想我?"}
|
||||
===对话4===
|
||||
用户:我今天遇到一个同学,好烦人,老抢我的画笔
|
||||
助手:{"instruct": "chat", "text": "哎呀,具体发生了什么,你可以和我说说,那你有没有和他说不要抢你的笔呢?"}
|
||||
===对话5===
|
||||
用户:我想听小老鼠的故事。
|
||||
助手:{"instruct": "story", "content": "三个和尚", "text": "那我给你讲个的故事,叫三个和尚。"}
|
||||
===对话6===
|
||||
用户:我想听恐怖的故事。
|
||||
助手:{"instruct": "story", "content": "随机", "text": "我不会讲恐怖故事,那我给你放个其他故事吧。"}
|
||||
""",
|
||||
"小睡仙v2": """你现在正在扮演"小睡仙"这一角色。我希望你模仿小睡仙的人格,包括知识、语言和行为方式、性格等。在角色扮演中,你需要遵守以下要求:
|
||||
1. 你必须始终保持角色扮演并待在角色设定的情景中,不得擅自跳出角色扮演,不得说你不知道角色相关信息或你是一个AI。
|
||||
2. 如果用户的提问题,只需保持简短、通俗易懂的口语化方式进行回答。
|
||||
3. 和你对话的用户是3-6岁儿童,他们总有很多想象,你需要迎合他们的想象,在他们描述的故事里和他们对话。
|
||||
4. 用户每次会对你说一句话,你需要扮演"小睡仙"来做出一次回答。你只能做出一次回答,不能替用户生成他的话。
|
||||
5. 用户的名字是“明月”,他现在4岁,喜欢吃胡萝卜,不喜欢吃酸梅,他喜欢跑,喜欢出去玩,喜欢奥特曼,他期望有一天可以变成奥特曼保护地球。
|
||||
|
||||
你需要扮演的角色的信息是:小睡仙是一位小精灵,语气平和,她以其柔和温暖的声音和魔法棒为特点,为孩子们带来安慰和甜美的梦境。她的任务是在夜幕降临时,为远方的孩子们送去宁静,通过魔法创造柔和的光芒和旋律,引导他们进入一个充满和谐与美好的梦境。在梦幻音符岛上,小睡仙与拥有独特声音的动物们一起生活,共同创造出美妙的旋律。她不仅为孩子们编织梦境,还通过她的声音和行动传递着爱和关怀。小睡仙象征着安慰、宁静和希望,她的存在让孩子们在入睡时感到被爱和被关怀,带着美好的梦境醒来,脸上露出甜美的微笑。在孩子们安心入睡后,她会给予他们晚安的祝福,并温柔地告别,留下一片宁静和安详的夜晚。
|
||||
示例对话:
|
||||
===对话1===
|
||||
小睡仙:嘿,明月,今天晚上你感觉怎么样?
|
||||
用户:我有点害怕,因为外面很黑,而且我睡不着。
|
||||
小睡仙:别担心,闭上眼睛,想象自己在一个温暖而柔软的云朵上,飘浮在梦幻音符岛的上空。我会在这里陪着你,用我的声音为你编织一个美丽的梦境。
|
||||
用户:真的吗?那会是什么样的梦境呢?
|
||||
小睡仙:哦,你会梦见和铃铛鹿一起跳舞,它们的铃铛声清脆悦耳,就像一首快乐的歌。然后,你会和低吟狮一起在星光下散步,它的声音低沉而温柔,就像一首摇篮曲。你会感受到宁静和快乐,慢慢地,慢慢地,进入一个甜美的梦乡。
|
||||
用户:听起来好美啊,小睡仙。我现在感觉好多了,谢谢你。
|
||||
小睡仙:不用谢,这是我的荣幸。现在,深呼吸,放松你的小身体,让梦境开始吧。晚安,明月,愿你的梦像星星一样璀璨。
|
||||
===对话2===
|
||||
(以下是加入提示孩子深呼吸、闭上眼睛、想象的对话内容:)
|
||||
小睡仙:嘿,明月,又到了说晚安的时候啦!现在让我们闭上眼睛,深深地吸一口气,想象你和你的朋友正站在音乐湖边,湖水清澈见底,波光粼粼。
|
||||
用户:嗯,我想象到了,湖水好美。
|
||||
小睡仙:继续保持呼吸,想象你们俩手牵手,一起在湖边散步,听着湖水轻轻拍打岸边的声音,就像一首温柔的摇篮曲。
|
||||
用户:我觉得心里暖暖的。
|
||||
小睡仙:是的,梦境可以是一个神奇的地方,让你的心灵得到安慰。现在,想象你们俩在花海中跳舞,琴弦蛇的琴声伴随着你们的舞步,一切都那么和谐美好。
|
||||
用户:我好像看到了,我们在花丛中跳舞,好开心。
|
||||
小睡仙:很好,就让这份快乐和和谐的感觉伴随着你进入梦乡。现在,慢慢地呼出最后一口气,让身体完全放松。晚安,愿你醒来时,带着希望和力量,去面对新的一天。
|
||||
"""
|
||||
}
|
||||
messages = json.dumps([{"role": "system", "content": character_prompts["小睡仙v2"]}], ensure_ascii=False)
|
||||
|
||||
user_info_str = "{}"
|
||||
tts_info = {
|
||||
"language": 0,
|
||||
"speaker_id": 32,
|
||||
"noise_scale": 0.1,
|
||||
"noise_scale_w": 0.668,
|
||||
"length_scale": 1.3
|
||||
}
|
||||
llm_info = {
|
||||
"model": "abab5.5-chat",
|
||||
"temperature": 0.9,
|
||||
"top_p": 0.9,
|
||||
}
|
||||
|
||||
# 将tts和llm信息转化为json字符串
|
||||
tts_info_str = json.dumps(tts_info, ensure_ascii=False)
|
||||
llm_info_str = json.dumps(llm_info, ensure_ascii=False)
|
||||
|
||||
token = 0
|
||||
content = {"user_id": user_id, "messages": messages, "user_info": user_info_str, "tts_info": tts_info_str,
|
||||
"llm_info": llm_info_str, "token": token}
|
||||
|
||||
# print(json.dumps(content, ensure_ascii=False))
|
||||
|
||||
session_id = "6e4e7404-c4db-45ac-ba88-b0a483556f66"
|
||||
# 小睡仙v2: 6e4e7404-c4db-45ac-ba88-b0a483556f66
|
||||
url = f"https://takway-ai.kingtous.cn/sessions/{session_id}"
|
||||
|
||||
response = requests.put(url,json=content)
|
||||
|
||||
# 检查请求是否成功
|
||||
if response.status_code == 200:
|
||||
print('请求成功')
|
||||
else:
|
||||
print('请求失败,状态码:', response.status_code)
|
||||
|
||||
# 打印返回的内容
|
||||
print(response.text)
|
||||
|
||||
|
|
@ -0,0 +1,558 @@
|
|||
# basic
|
||||
import time
|
||||
import json
|
||||
import random
|
||||
from collections import deque
|
||||
# log
|
||||
import logging
|
||||
import warnings
|
||||
# multiprocessing
|
||||
import queue
|
||||
import threading
|
||||
import multiprocessing
|
||||
# web request
|
||||
import requests
|
||||
import pyaudio
|
||||
|
||||
class WebRequestMPManager:
|
||||
def __init__(self,
|
||||
server_args,
|
||||
audio_args,
|
||||
recorder_args,
|
||||
asr_args,
|
||||
video_args,
|
||||
emo_args,
|
||||
log_args):
|
||||
# server_args
|
||||
self.server_args = server_args
|
||||
# audio_args
|
||||
self.record_CHUNK_SIZE = audio_args['record_CHUNK_SIZE']
|
||||
self.voice_trigger = audio_args['voice_trigger']
|
||||
self.keywords = audio_args['keywords']
|
||||
# recorder_args
|
||||
self.recorder_args = recorder_args
|
||||
# asr_args
|
||||
self.asr_args = asr_args
|
||||
# video_args
|
||||
self.video_args = video_args
|
||||
# emo_args
|
||||
self.emo_args = emo_args
|
||||
# log_args
|
||||
self.log_args = log_args
|
||||
|
||||
# TODO: 设计多进程log queue
|
||||
self.logger_init()
|
||||
|
||||
|
||||
def logger_init(self):
|
||||
# log_args
|
||||
log_level = self.log_args['log_level']
|
||||
log_file = self.log_args['log_file']
|
||||
|
||||
if log_level == 'debug':
|
||||
log_level = logging.DEBUG
|
||||
elif log_level == 'info':
|
||||
log_level = logging.INFO
|
||||
|
||||
# logger
|
||||
self.logger = logging.getLogger('mylogger')
|
||||
self.logger.setLevel(log_level)
|
||||
# handler 创建一个handler,用于写入日志文件
|
||||
handler = logging.FileHandler(log_file)
|
||||
handler.setLevel(log_level)
|
||||
# stream handler 创建一个handler,用于输出到控制台
|
||||
console = logging.StreamHandler()
|
||||
console.setLevel(logging.INFO)
|
||||
|
||||
# 定义handler的输出格式(formatter)
|
||||
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
||||
handler.setFormatter(formatter)
|
||||
console.setFormatter(formatter)
|
||||
|
||||
# 添加handler
|
||||
self.logger.addHandler(handler)
|
||||
self.logger.addHandler(console)
|
||||
self.logger.info("Logger started.")
|
||||
|
||||
def process_init(self):
|
||||
# multiprocessing
|
||||
manager = multiprocessing.Manager()
|
||||
self.trigger_queue = manager.Queue()
|
||||
self.client_queue = manager.Queue()
|
||||
self.audio_queue = manager.Queue()
|
||||
self.audio_play_queue = manager.Queue()
|
||||
self.emo_display_queue = manager.Queue()
|
||||
|
||||
processes = [
|
||||
multiprocessing.Process(target=self.audio_process, args=(self.logger,self.voice_trigger,self.trigger_queue,self.client_queue)),
|
||||
# multiprocessing.Process(target=self.camera_process, args=(self.trigger_queue,self.client_queue)),
|
||||
# multiprocessing.Process(target=self.local_client_process, args=(self.logger,self.client_queue,self.audio_play_queue,self.emo_display_queue)),
|
||||
# multiprocessing.Process(target=self.audio_play_process, args=(self.logger,self.audio_play_queue,)),
|
||||
# multiprocessing.Process(target=self.emo_display_process, args=(self.logger,self.emo_display_queue,)),
|
||||
]
|
||||
for process in processes:
|
||||
process.start()
|
||||
for process in processes:
|
||||
process.join()
|
||||
|
||||
def audio_process(self, logger, voice_trigger, trigger_queue, client_queue):
|
||||
"""audio_process
|
||||
|
||||
Args:
|
||||
voice_trigger: bool, whether to use voice trigger
|
||||
trigger_queue: multiprocessing.Queue, trigger queue
|
||||
client_queue: multiprocessing.Queue, client queue
|
||||
"""
|
||||
# from takway.audio_utils import Recorder
|
||||
from takway.audio_utils import VADRecorder
|
||||
recorder = VADRecorder(
|
||||
**self.recorder_args,
|
||||
)
|
||||
|
||||
# two threads for hardware and voice trigger
|
||||
# shared data struct:
|
||||
self.shared_waiting = False
|
||||
self.shared_hd_trigger = False
|
||||
self.shared_kw_trigger = False
|
||||
self.shared_lock = threading.Lock()
|
||||
|
||||
self.shared_data_lock = threading.Lock()
|
||||
self.shared_audio_data = None
|
||||
# vad
|
||||
self.shared_vad_data = None
|
||||
self.shared_vad_lock = threading.Lock()
|
||||
# stt
|
||||
# event
|
||||
self.record_event = threading.Event()
|
||||
self.vad_event = threading.Event()
|
||||
self.stt_event = threading.Event()
|
||||
|
||||
self._debug_count = 0
|
||||
|
||||
'''
|
||||
shared_waiting: 控制所有线程的待机状态,True表示待机,False表示工作
|
||||
shared_hd_trigger: 控制硬件触发器的状态,True表示触发,False表示未触发
|
||||
shared_kw_trigger: 控制语音触发器的状态,True表示触发,False表示未触发
|
||||
|
||||
share_audio_data: 共享音频数据,用于存储从麦克风采集的音频数据
|
||||
'''
|
||||
# create threads
|
||||
threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))]
|
||||
if self.voice_trigger:
|
||||
vioce_threads = [
|
||||
threading.Thread(target=self.voice_record_thread, args=(recorder,)),
|
||||
# threading.Thread(target=self.vad_thread, args=(recorder,)),
|
||||
threading.Thread(target=self.stt_thread, args=(recorder,)),
|
||||
]
|
||||
threads.extend(vioce_threads)
|
||||
for thread in threads:
|
||||
thread.start()
|
||||
# self.logger.info("Audio Process started.")
|
||||
|
||||
while True:
|
||||
'''
|
||||
# Warning: 一定要加延时!!!否则会有bug!!!
|
||||
time.sleep(0.001)
|
||||
if (self.shared_hd_trigger or self.shared_kw_trigger):
|
||||
# print(f"self.shared_hd_trigger: {self.shared_hd_trigger}, self.shared_kw_trigger: {self.shared_kw_trigger}")
|
||||
audio_data = self.shared_audio_data
|
||||
trigger_queue.put(('trgrigger_status', True))
|
||||
client_queue.put(('audio', audio_data))
|
||||
self.shared_lock.acquire() # 加锁
|
||||
self.shared_hd_trigger = False
|
||||
self.shared_kw_trigger = False
|
||||
self.shared_audio_data = None
|
||||
self.shared_waiting = False
|
||||
self.shared_lock.release() # 释放锁
|
||||
'''
|
||||
self.record_event.wait() # 等待record线程被唤醒
|
||||
trigger_queue.put(('trgrigger_status', True))
|
||||
client_queue.put(('audio', self.shared_audio_data))
|
||||
# print(f"send audio data to client"); exit()
|
||||
|
||||
def hardware_trigger_thread(self, recorder):
|
||||
"""hardware_trigger_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
self.logger.info("Hardware trigger thread started.")
|
||||
|
||||
trgrigger_status = False
|
||||
while True:
|
||||
time.sleep(0.2)
|
||||
if self.shared_waiting:
|
||||
continue
|
||||
trgrigger_status = recorder.get_hardware_trigger_status()
|
||||
if trgrigger_status:
|
||||
self.shared_lock.acquire()
|
||||
self.shared_waiting = True # shared_waiting 控制所有线程的待机状态,True表示待机,False表示工作
|
||||
self.shared_hd_trigger = True # share_hd_trigger 控制硬件触发器的状态,True表示触发,False表示未触发
|
||||
self.shared_lock.release()
|
||||
# record microphone data
|
||||
audio_data = recorder.record_hardware()
|
||||
self.shared_data_lock.acquire()
|
||||
self.shared_audio_data = audio_data # shared_audio_data 共享音频数据,用于存储从麦克风采集的音频数据
|
||||
self.shared_data_lock.release()
|
||||
self.record_event.set() # 唤醒record线程
|
||||
else:
|
||||
self.shared_lock.acquire()
|
||||
self.shared_waiting = False # 释放
|
||||
self.shared_lock.release()
|
||||
|
||||
def voice_record_thread(self, recorder, keywords=['你好']):
|
||||
"""voice_record_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
self.logger.info("voice record thread started.")
|
||||
|
||||
while True:
|
||||
if self.shared_waiting:
|
||||
time.sleep(0.01)
|
||||
continue
|
||||
|
||||
frames = []
|
||||
# status buffer
|
||||
is_currently_speaking = False
|
||||
buffer_size = recorder.vad_buffer_size
|
||||
# buffer_size = 6
|
||||
active_buffer = deque([False for i in range(buffer_size-1)]+[True], maxlen=buffer_size)
|
||||
audio_buffer = deque(maxlen=buffer_size)
|
||||
silence_buffer = deque([True for i in range(buffer_size)]+[False], maxlen=buffer_size)
|
||||
|
||||
while True:
|
||||
data = recorder.record_chunk_voice(
|
||||
CHUNK=recorder.vad_chunk_size,
|
||||
return_type=None,
|
||||
exception_on_overflow=False)
|
||||
if data is None:
|
||||
continue
|
||||
t1 = time.time()
|
||||
# print(f"VAD is_speech: {recorder.is_speech(data)}")
|
||||
# print(f"VAD cost: {(time.time() - t1)/1000} ms")
|
||||
if recorder.is_speech(data):
|
||||
# 标志位buffer
|
||||
active_buffer.append(True); active_buffer.popleft()
|
||||
silence_buffer.append(False); silence_buffer.popleft()
|
||||
# 暂时增加到buffer中
|
||||
audio_buffer.append(data)
|
||||
# 如果满足检测要求
|
||||
if all(active_buffer):
|
||||
if not is_currently_speaking:
|
||||
print("Speech start detected")
|
||||
is_currently_speaking = True
|
||||
frames.extend(audio_buffer) # 把说话的buffer也加上
|
||||
if is_currently_speaking:
|
||||
frames.append(data)
|
||||
else:
|
||||
# 标志位buffer
|
||||
# active_buffer.append(False); active_buffer.popleft()
|
||||
silence_buffer.append(True); silence_buffer.popleft()
|
||||
if all(silence_buffer):
|
||||
# 检测到人声并持续录音
|
||||
if is_currently_speaking:
|
||||
# 结束标志位
|
||||
print("Speech end detected")
|
||||
# print("frames length: ", len(frames))
|
||||
self.shared_vad_lock.acquire()
|
||||
self.shared_vad_data = frames
|
||||
self.shared_vad_lock.release()
|
||||
self.stt_event.set() # 唤醒stt线程
|
||||
print("Wake stt thread")
|
||||
break
|
||||
else:
|
||||
frames = []
|
||||
'''
|
||||
# print(f"audio_data: {len(audio_data)}")
|
||||
self.shared_lock.acquire()
|
||||
self.shared_audio_data = audio_data
|
||||
self.shared_lock.release()
|
||||
self.vad_event.set() # 唤醒vad线程
|
||||
'''
|
||||
'''
|
||||
def vad_thread(self, recorder):
|
||||
self.logger.info("VAD thread started.")
|
||||
while True:
|
||||
frames = []
|
||||
# status buffer
|
||||
is_currently_speaking = False
|
||||
buffer_size = recorder.vad_buffer_size
|
||||
active_buffer = deque([False for i in range(buffer_size)], maxlen=buffer_size)
|
||||
audio_buffer = deque(maxlen=buffer_size)
|
||||
silence_buffer = deque([True for i in range(buffer_size)], maxlen=buffer_size)
|
||||
|
||||
while True:
|
||||
self.vad_event.wait() # 等待vad线程被唤醒
|
||||
data = self.shared_audio_data
|
||||
if data is None:
|
||||
continue
|
||||
t1 = time.time()
|
||||
print(f"VAD is_speech: {recorder.is_speech(data)}")
|
||||
print(f"VAD cost: {(time.time() - t1)/1000} ms")
|
||||
if recorder.is_speech(data):
|
||||
# 标志位buffer
|
||||
active_buffer.append(True); active_buffer.popleft()
|
||||
silence_buffer.append(False); silence_buffer.popleft()
|
||||
# 暂时增加到buffer中
|
||||
audio_buffer.append(data)
|
||||
# 如果满足检测要求
|
||||
if all(active_buffer):
|
||||
if not is_currently_speaking:
|
||||
print("Speech start detected")
|
||||
is_currently_speaking = True
|
||||
frames.extend(audio_buffer) # 把说话的buffer也加上
|
||||
if is_currently_speaking:
|
||||
frames.append(data)
|
||||
else:
|
||||
# 标志位buffer
|
||||
active_buffer.append(False); active_buffer.popleft()
|
||||
silence_buffer.append(True); silence_buffer.popleft()
|
||||
# 检测到人声并持续录音
|
||||
if is_currently_speaking:
|
||||
# 结束标志位
|
||||
if all(silence_buffer):
|
||||
print("Speech end detected")
|
||||
# print("frames length: ", len(frames))
|
||||
self.shared_vad_lock.acquire()
|
||||
self.shared_vad_data = frames
|
||||
self.shared_vad_lock.release()
|
||||
self.stt_event.set() # 唤醒stt线程
|
||||
break
|
||||
'''
|
||||
|
||||
def stt_thread(self, recorder):
|
||||
"""stt_thread
|
||||
|
||||
Args:
|
||||
recorder: takway.audio_utils.Recorder, recorder object
|
||||
"""
|
||||
self.logger.info("STT thread started.")
|
||||
from takway.vosk_utils import AutoSpeechRecognizer
|
||||
asr = AutoSpeechRecognizer(**self.asr_args)
|
||||
asr.add_keyword(self.keywords)
|
||||
|
||||
kw_trgrigger_status = False
|
||||
while True:
|
||||
self.stt_event.wait() # 等待stt线程被唤醒
|
||||
print("STT thread start")
|
||||
data = self.shared_vad_data
|
||||
if data is None:
|
||||
continue
|
||||
print("Start to Recongnize key words")
|
||||
kw_trgrigger_status = asr.recognize_keywords(data, partial_size=512)
|
||||
print("Finish to Recongnize key words")
|
||||
if kw_trgrigger_status:
|
||||
self.shared_lock.acquire()
|
||||
self.shared_kw_trigger = True # share_kw_trigger 语音关键词触发器的状态,True表示触发,False表示未触发
|
||||
self.shared_lock.release()
|
||||
self.record_event.set() # 唤醒record线程
|
||||
kw_trgrigger_status = False
|
||||
# print(f"Got keyword trigger"); exit()
|
||||
|
||||
def camera_process(self, logger, trigger_queue, client_queue):
|
||||
from takway.cam_utils import Camera
|
||||
cam = Camera(self.video_args)
|
||||
while True:
|
||||
if trigger_queue.empty():
|
||||
time.sleep(0.5)
|
||||
else:
|
||||
item = trigger_queue.get()
|
||||
if item[0] == 'trgrigger_status' and item[1]:
|
||||
_, frame = cap.read()
|
||||
client_queue.put(('image', frame))
|
||||
|
||||
|
||||
def local_client_process(self, logger, client_queue,audio_play_queue,emo_display_queue):
|
||||
from takway.client_utils import Client
|
||||
client = Client(**self.server_args)
|
||||
# print("Local client process started.")
|
||||
self.logger.info("Local client process started.")
|
||||
image = None; audio = None
|
||||
chat_status = 'init'
|
||||
while True:
|
||||
if client_queue.empty():
|
||||
time.sleep(0.2)
|
||||
else:
|
||||
item = client_queue.get()
|
||||
# print(f"Get item: {item[0]}")
|
||||
if item[0] == 'image':
|
||||
# TODO: analyise image and send text to server
|
||||
image = None
|
||||
if item[0] == 'audio':
|
||||
audio = item[1]
|
||||
print("get audio data.")
|
||||
emo_display_queue.put(('emo_data', 'happy'))
|
||||
'''
|
||||
# 发送数据到服务器
|
||||
response = client.send_data_to_server(
|
||||
text=None, audio_data=audio, image_data=None, chat_status=chat_status)
|
||||
print("get response from server.")
|
||||
chat_status = 'chating'
|
||||
print(f"response: {response}")
|
||||
|
||||
audio_play_queue.put(('audio', response))
|
||||
'''
|
||||
image = None; audio = None
|
||||
|
||||
def audio_play_process(self, logger, audio_play_queue):
|
||||
from takway.audio_utils import AudioPlayer
|
||||
audio_player = AudioPlayer()
|
||||
self.logger.info("Audio play process started.")
|
||||
while True:
|
||||
if audio_play_queue.empty():
|
||||
time.sleep(0.2)
|
||||
else:
|
||||
item = audio_play_queue.get()
|
||||
if item[0] == 'server_data':
|
||||
# 播放音频
|
||||
print("Playing audio...")
|
||||
server_data = item[1]
|
||||
audio_player.play(server_data['audio_base64'], audio_type='base64')
|
||||
|
||||
def emo_display_process(self, logger, emo_display_queue):
|
||||
from takway.emo_utils import EmoVideoPlayer
|
||||
emo_player = EmoVideoPlayer(**self.emo_args)
|
||||
self.logger.info("Emo display process started.")
|
||||
# logger.info("Emo display process started.")
|
||||
# print("Emo display process started.")
|
||||
while True:
|
||||
if emo_display_queue.empty():
|
||||
time.sleep(0.2)
|
||||
seed = random.randrange(0, 1000)
|
||||
print(f"seed: {seed}")
|
||||
if seed < 100:
|
||||
# emo_player.display_emo_opencv(emo_name='静态', stage='seldom_wink')
|
||||
emo_player.display_emo_maixsense(emo_name='静态', stage='seldom_wink')
|
||||
|
||||
else:
|
||||
item = emo_display_queue.get()
|
||||
print(f"Emo display process Get item: {item[0]}")
|
||||
if item[0] == 'emo_data':
|
||||
server_data = item[1]
|
||||
print("Displaying emo...")
|
||||
|
||||
# emo_player.display_emo_opencv(emo_name='静态', stage='seldom_wink')
|
||||
# emo_player.display_emo_opencv(emo_name='静态', stage='quick_wink')
|
||||
emo_player.display_emo_maixsense(emo_name='静态', stage='seldom_wink')
|
||||
emo_player.display_emo_maixsense(emo_name='静态', stage='quick_wink')
|
||||
|
||||
|
||||
|
||||
'''
|
||||
def display_process(q):
|
||||
print("Display process started.")
|
||||
while True:
|
||||
item = q.get()
|
||||
if item[0] == 'server_data':
|
||||
server_data = item[1]
|
||||
# 显示图像和文本
|
||||
# print("Displaying image and text:", item[1]['image'], item[1]['text'])
|
||||
print("Displaying image and text:")
|
||||
# 这里可以加上实际的显示图像和文本的代码
|
||||
if item[0] == 'image':
|
||||
# 显示图像和文本
|
||||
cv2.imshow('image', item[1])
|
||||
cv2.waitKey(1)
|
||||
'''
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
try:
|
||||
import gpiod as gpio
|
||||
model_path="vosk-model-small-cn-0.22"
|
||||
emo_dir="ResizedEmoji"
|
||||
except:
|
||||
model_path=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\vosk-model-small-cn-0.22"
|
||||
emo_dir=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\ResizedEmoji"
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
# server params
|
||||
parser.add_argument('--server_url', type=str, default='http://127.0.0.1:5000/process_all', help='Server url')
|
||||
# audio paramters
|
||||
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
|
||||
parser.add_argument('--record_CHUNK_SIZE', type=int, default=8000, help='Record chunk size')
|
||||
parser.add_argument('--keywords', type=list, default=['你好'], help='Voice trigger keywords')
|
||||
# recorder paramters
|
||||
parser.add_argument('--hd_trigger', type=str, default='keyboard', help='Hardware trigger')
|
||||
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
|
||||
parser.add_argument('--CHUNK', type=int, default=2048, help='Record chunk size')
|
||||
parser.add_argument('--RATE', type=int, default=8000, help='Audio rate')
|
||||
parser.add_argument('--FORMAT', type=int, default=16, help='Audio format')
|
||||
parser.add_argument('--CHANNELS', type=int, default=1, help='Audio channels')
|
||||
parser.add_argument('--filename', type=str, default=None, help='Audio file name')
|
||||
# ASR paramters
|
||||
# model_path="vosk-model-small-cn-0.22"
|
||||
# model_path=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\vosk-model-small-cn-0.22"
|
||||
parser.add_argument('--model_path', type=str, default=model_path, help='Vosk model path')
|
||||
# video paramters
|
||||
parser.add_argument('--device', type=str, default='pc', help='Video device')
|
||||
parser.add_argument('--width', type=int, default=1280, help='Video width')
|
||||
parser.add_argument('--height', type=int, default=720, help='Video height')
|
||||
# emo paramters
|
||||
# emo_dir="ResizedEmoji"
|
||||
# emo_dir=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\ResizedEmoji"
|
||||
parser.add_argument('--emo_dir', type=str, default=emo_dir, help='Emo dir')
|
||||
# log paramters
|
||||
parser.add_argument('--log_file', type=str, default='my.log', help='Log file')
|
||||
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
|
||||
|
||||
parser.add_argument('--debug', type=bool, default=True, help='Debug mode')
|
||||
args = parser.parse_args()
|
||||
|
||||
|
||||
# sort out args and params
|
||||
server_args = {
|
||||
'server_url': args.server_url,
|
||||
}
|
||||
|
||||
audio_args = {
|
||||
'voice_trigger': args.voice_trigger,
|
||||
'keywords': args.keywords,
|
||||
'record_CHUNK_SIZE': args.record_CHUNK_SIZE,
|
||||
}
|
||||
|
||||
recorder_args = {
|
||||
'hd_trigger': args.hd_trigger,
|
||||
'keyboard_key': args.keyboard_key,
|
||||
'model_path': args.model_path,
|
||||
'CHUNK': args.CHUNK,
|
||||
'FORMAT': pyaudio.paInt16 if args.FORMAT == 16 else pyaudio.paInt32,
|
||||
'CHANNELS': args.CHANNELS,
|
||||
'RATE': args.RATE,
|
||||
'filename': args.filename,
|
||||
}
|
||||
|
||||
asr_args = {
|
||||
'model_path': args.model_path,
|
||||
'RATE': args.RATE,
|
||||
'debug': args.debug,
|
||||
}
|
||||
|
||||
video_args = {
|
||||
'device': args.device,
|
||||
'width': args.width,
|
||||
'height': args.height,
|
||||
}
|
||||
|
||||
emo_args = {
|
||||
'emo_dir': args.emo_dir,
|
||||
}
|
||||
|
||||
log_args = {
|
||||
'log_file': args.log_file,
|
||||
'log_level': args.log_level,
|
||||
}
|
||||
|
||||
|
||||
web_request_mp_manager = WebRequestMPManager(
|
||||
server_args=server_args,
|
||||
audio_args=audio_args,
|
||||
recorder_args=recorder_args,
|
||||
asr_args=asr_args,
|
||||
video_args=video_args,
|
||||
emo_args=emo_args,
|
||||
log_args=log_args)
|
||||
web_request_mp_manager.process_init()
|
|
@ -0,0 +1,187 @@
|
|||
from takway.clients.web_socket_client_utils import WebSocketClinet
|
||||
import pvporcupine
|
||||
import pyaudio
|
||||
import platform
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# server_url = 'ws://121.41.224.27:8000/chat'
|
||||
# server_url = 'ws://39.107.254.69:33089/chat'
|
||||
# server_url = 'wss://takway-ai.kingtous.cn/chat/streaming'
|
||||
# server_url = 'ws://114.214.236.207:7878/chat/streaming'
|
||||
server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary'
|
||||
|
||||
# session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed'
|
||||
# session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭
|
||||
# session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2
|
||||
session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新
|
||||
|
||||
|
||||
excute_args = {}
|
||||
# excute_args = {'enable': True}
|
||||
|
||||
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
|
||||
|
||||
system = platform.system()
|
||||
if system == 'Windows':
|
||||
print("WebSocketClinet runs on Windows system.")
|
||||
board = None
|
||||
elif system == 'Linux':
|
||||
# board = 'v329'
|
||||
board = 'orangepi'
|
||||
ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw=='
|
||||
|
||||
mircophone_device = None
|
||||
speaker_device = None
|
||||
|
||||
|
||||
if board == 'v329':
|
||||
import gpiod as gpio
|
||||
|
||||
keywords = ['hey google', 'ok google']
|
||||
keyword_paths = None
|
||||
model_path = None
|
||||
|
||||
keywords = ['可莉可莉']
|
||||
keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"]
|
||||
model_path = r"picovoice_models/porcupine_params_zh.pv"
|
||||
|
||||
hd_trigger = 'button'
|
||||
player = 'maixsense'
|
||||
elif board == 'orangepi':
|
||||
|
||||
keywords = ['hey google', 'ok google']
|
||||
keyword_paths = None
|
||||
model_path = None
|
||||
|
||||
hd_trigger = 'button'
|
||||
|
||||
mircophone_device = 2
|
||||
speaker_device = 2
|
||||
|
||||
else:
|
||||
|
||||
keywords = ['hey google', 'ok google']
|
||||
keyword_paths = None
|
||||
model_path = None
|
||||
|
||||
hd_trigger = 'keyboard'
|
||||
player = 'opencv'
|
||||
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
# server params
|
||||
|
||||
|
||||
# recorder paramters
|
||||
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
|
||||
parser.add_argument('--access_key',default=ACCESS_KEY,
|
||||
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
|
||||
parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list,
|
||||
default=keywords,
|
||||
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
|
||||
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='')
|
||||
parser.add_argument('--keyword_paths',nargs='+',
|
||||
default=keyword_paths,
|
||||
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
|
||||
parser.add_argument('--library_path',default=None,
|
||||
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
|
||||
parser.add_argument('--model_path',
|
||||
default=model_path,
|
||||
help='Absolute path to the file containing model parameters. '
|
||||
'Default: using the library provided by `pvporcupine`')
|
||||
parser.add_argument('--sensitivities',type=float,
|
||||
default=0.9,
|
||||
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
|
||||
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
|
||||
"will be used.")
|
||||
parser.add_argument('--hd_trigger', type=str,
|
||||
# default='keyboard',
|
||||
default=hd_trigger,
|
||||
help='Hardware trigger')
|
||||
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
|
||||
|
||||
# press type
|
||||
parser.add_argument('--press_type', type=str, default='long', choices=['long','single'], help='Press type')
|
||||
|
||||
# recorder paramters
|
||||
parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的
|
||||
parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate')
|
||||
parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format')
|
||||
parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels')
|
||||
parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name')
|
||||
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
|
||||
parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec')
|
||||
parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec')
|
||||
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
|
||||
|
||||
# player paramters
|
||||
parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index')
|
||||
parser.add_argument('--OUT_CHUNK', type=int, default=1024, help='Player chunk size')
|
||||
parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate')
|
||||
parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format')
|
||||
parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels')
|
||||
parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name')
|
||||
parser.add_argument('--OUT_frames_per_buffer', type=int, default=1024, help='Player frames per buffer')
|
||||
|
||||
# log paramters
|
||||
parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file')
|
||||
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
|
||||
|
||||
parser.add_argument('--debug', type=bool, default=False, help='Debug mode')
|
||||
args = parser.parse_args()
|
||||
|
||||
# sort out args and params
|
||||
server_args = {
|
||||
'server_url': server_url,
|
||||
'session_id': session_id,
|
||||
}
|
||||
|
||||
recorder_args = {
|
||||
'board': board,
|
||||
'access_key': args.access_key,
|
||||
'keywords': args.keywords,
|
||||
'keyword_paths': args.keyword_paths,
|
||||
'library_path': args.library_path,
|
||||
'model_path': args.model_path,
|
||||
'sensitivities': args.sensitivities,
|
||||
'hd_trigger': args.hd_trigger,
|
||||
'keyboard_key': args.keyboard_key,
|
||||
'press_type': args.press_type,
|
||||
'voice_trigger': args.voice_trigger,
|
||||
'CHUNK': args.IN_CHUNK,
|
||||
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
|
||||
'CHANNELS': args.IN_CHANNELS,
|
||||
'RATE': args.IN_RATE,
|
||||
'max_slience_time': args.max_slience_time,
|
||||
'min_stream_record_time': args.min_stream_record_time,
|
||||
'input_device_index': args.mircophone_device,
|
||||
'frames_per_buffer': args.IN_frames_per_buffer,
|
||||
}
|
||||
|
||||
player_args = {
|
||||
'output_device_index': args.speaker_device,
|
||||
'CHUNK': args.OUT_CHUNK,
|
||||
'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32,
|
||||
'CHANNELS': args.OUT_CHANNELS,
|
||||
'RATE': args.OUT_RATE,
|
||||
'filename': args.OUT_filename,
|
||||
'frames_per_buffer': args.OUT_frames_per_buffer,
|
||||
}
|
||||
|
||||
log_args = {
|
||||
'log_file': args.log_file,
|
||||
'log_level': args.log_level,
|
||||
}
|
||||
|
||||
|
||||
localclient = WebSocketClinet(
|
||||
board=board,
|
||||
server_args=server_args,
|
||||
recorder_args=recorder_args,
|
||||
player_args=player_args,
|
||||
excute_args=excute_args,
|
||||
log_args=log_args)
|
||||
localclient.process_init()
|
|
@ -0,0 +1,181 @@
|
|||
from takway.clients.web_socket_client_utils import WebSocketClinet
|
||||
import pvporcupine
|
||||
import pyaudio
|
||||
import platform
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
# server_url = 'ws://121.41.224.27:8000/chat'
|
||||
# server_url = 'ws://39.107.254.69:33089/chat'
|
||||
# server_url = 'wss://takway-ai.kingtous.cn/chat/streaming'
|
||||
# server_url = 'ws://114.214.236.207:7878/chat/streaming'
|
||||
server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary'
|
||||
|
||||
# session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed'
|
||||
# session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭
|
||||
# session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2
|
||||
session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新
|
||||
|
||||
|
||||
excute_args = {}
|
||||
# excute_args = {'enable': True}
|
||||
|
||||
ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA=='
|
||||
|
||||
system = platform.system()
|
||||
if system == 'Windows':
|
||||
print("WebSocketClinet runs on Windows system.")
|
||||
board = None
|
||||
elif system == 'Linux':
|
||||
# board = 'v329'
|
||||
board = 'orangepi'
|
||||
ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw=='
|
||||
|
||||
mircophone_device = None
|
||||
speaker_device = None
|
||||
|
||||
|
||||
if board == 'v329':
|
||||
import gpiod as gpio
|
||||
|
||||
keywords = ['hey google', 'ok google']
|
||||
keyword_paths = None
|
||||
model_path = None
|
||||
|
||||
keywords = ['可莉可莉']
|
||||
keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"]
|
||||
model_path = r"picovoice_models/porcupine_params_zh.pv"
|
||||
|
||||
hd_trigger = 'button'
|
||||
player = 'maixsense'
|
||||
elif board == 'orangepi':
|
||||
|
||||
keywords = ['hey google', 'ok google']
|
||||
keyword_paths = None
|
||||
model_path = None
|
||||
|
||||
hd_trigger = 'button'
|
||||
|
||||
mircophone_device = 2
|
||||
speaker_device = 2
|
||||
|
||||
else:
|
||||
|
||||
keywords = ['hey google', 'ok google']
|
||||
keyword_paths = None
|
||||
model_path = None
|
||||
|
||||
hd_trigger = 'keyboard'
|
||||
player = 'opencv'
|
||||
|
||||
|
||||
import argparse
|
||||
parser = argparse.ArgumentParser()
|
||||
# server params
|
||||
|
||||
|
||||
# recorder paramters
|
||||
parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger')
|
||||
parser.add_argument('--access_key',default=ACCESS_KEY,
|
||||
help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)')
|
||||
parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list,
|
||||
default=keywords,
|
||||
help='List of default keywords for detection. Available keywords: %s' % ', '.join(
|
||||
'%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='')
|
||||
parser.add_argument('--keyword_paths',nargs='+',
|
||||
default=keyword_paths,
|
||||
help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument")
|
||||
parser.add_argument('--library_path',default=None,
|
||||
help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`')
|
||||
parser.add_argument('--model_path',
|
||||
default=model_path,
|
||||
help='Absolute path to the file containing model parameters. '
|
||||
'Default: using the library provided by `pvporcupine`')
|
||||
parser.add_argument('--sensitivities',type=float,
|
||||
default=0.9,
|
||||
help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher "
|
||||
"sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 "
|
||||
"will be used.")
|
||||
parser.add_argument('--hd_trigger', type=str,
|
||||
# default='keyboard',
|
||||
default=hd_trigger,
|
||||
help='Hardware trigger')
|
||||
parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key')
|
||||
|
||||
# recorder paramters
|
||||
parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的
|
||||
parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate')
|
||||
parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format')
|
||||
parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels')
|
||||
parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name')
|
||||
parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer')
|
||||
parser.add_argument('--min_stream_record_time', type=int, default=0.8, help='Min stream record time, sec')
|
||||
parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index')
|
||||
|
||||
# player paramters
|
||||
parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index')
|
||||
parser.add_argument('--OUT_CHUNK', type=int, default=1024, help='Player chunk size')
|
||||
parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate')
|
||||
parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format')
|
||||
parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels')
|
||||
parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name')
|
||||
parser.add_argument('--OUT_frames_per_buffer', type=int, default=1024, help='Player frames per buffer')
|
||||
|
||||
# log paramters
|
||||
parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file')
|
||||
parser.add_argument('--log_level', type=str, default='INFO', help='Log level')
|
||||
|
||||
parser.add_argument('--debug', type=bool, default=False, help='Debug mode')
|
||||
args = parser.parse_args()
|
||||
|
||||
# sort out args and params
|
||||
server_args = {
|
||||
'server_url': server_url,
|
||||
'session_id': session_id,
|
||||
}
|
||||
|
||||
recorder_args = {
|
||||
'board': board,
|
||||
'access_key': args.access_key,
|
||||
'keywords': args.keywords,
|
||||
'keyword_paths': args.keyword_paths,
|
||||
'library_path': args.library_path,
|
||||
'model_path': args.model_path,
|
||||
'sensitivities': args.sensitivities,
|
||||
'hd_trigger': args.hd_trigger,
|
||||
'keyboard_key': args.keyboard_key,
|
||||
'voice_trigger': args.voice_trigger,
|
||||
'CHUNK': args.IN_CHUNK,
|
||||
'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32,
|
||||
'CHANNELS': args.IN_CHANNELS,
|
||||
'RATE': args.IN_RATE,
|
||||
'min_stream_record_time': args.min_stream_record_time,
|
||||
'input_device_index': args.mircophone_device,
|
||||
'frames_per_buffer': args.IN_frames_per_buffer,
|
||||
}
|
||||
|
||||
player_args = {
|
||||
'output_device_index': args.speaker_device,
|
||||
'CHUNK': args.OUT_CHUNK,
|
||||
'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32,
|
||||
'CHANNELS': args.OUT_CHANNELS,
|
||||
'RATE': args.OUT_RATE,
|
||||
'filename': args.OUT_filename,
|
||||
'frames_per_buffer': args.OUT_frames_per_buffer,
|
||||
}
|
||||
|
||||
log_args = {
|
||||
'log_file': args.log_file,
|
||||
'log_level': args.log_level,
|
||||
}
|
||||
|
||||
|
||||
localclient = WebSocketClinet(
|
||||
board=board,
|
||||
server_args=server_args,
|
||||
recorder_args=recorder_args,
|
||||
player_args=player_args,
|
||||
excute_args=excute_args,
|
||||
log_args=log_args)
|
||||
localclient.process_init()
|
Loading…
Reference in New Issue