commit 8255cc8c6861f318cb64359dc84aec6048b10b82 Author: IrvingGao <1729854488@qq.com> Date: Thu May 23 01:27:51 2024 +0800 update diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..f2d1a09 --- /dev/null +++ b/.gitignore @@ -0,0 +1,401 @@ +## Ignore Visual Studio temporary files, build results, and +## files generated by popular Visual Studio add-ons. +## +## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore + +# User-specific files +*.rsuser +*.suo +*.user +*.userosscache +*.sln.docstates + +# User-specific files (MonoDevelop/Xamarin Studio) +*.userprefs + +# Mono auto generated files +mono_crash.* + +# Build results +[Dd]ebug/ +[Dd]ebugPublic/ +[Rr]elease/ +[Rr]eleases/ +x64/ +x86/ +[Ww][Ii][Nn]32/ +[Aa][Rr][Mm]/ +[Aa][Rr][Mm]64/ +bld/ +[Bb]in/ +[Oo]bj/ +[Oo]ut/ +[Ll]og/ +[Ll]ogs/ + +# Visual Studio 2015/2017 cache/options directory +.vs/ +# Uncomment if you have tasks that create the project's static files in wwwroot +#wwwroot/ + +# Visual Studio 2017 auto generated files +Generated\ Files/ + +# MSTest test Results +[Tt]est[Rr]esult*/ +[Bb]uild[Ll]og.* + +# NUnit +*.VisualState.xml +TestResult.xml +nunit-*.xml + +# Build Results of an ATL Project +[Dd]ebugPS/ +[Rr]eleasePS/ +dlldata.c + +# Benchmark Results +BenchmarkDotNet.Artifacts/ + +# .NET Core +project.lock.json +project.fragment.lock.json +artifacts/ + +# ASP.NET Scaffolding +ScaffoldingReadMe.txt + +# StyleCop +StyleCopReport.xml + +# Files built by Visual Studio +*_i.c +*_p.c +*_h.h +*.ilk +*.meta +*.obj +*.iobj +*.pch +*.pdb +*.ipdb +*.pgc +*.pgd +*.rsp +*.sbr +*.tlb +*.tli +*.tlh +*.tmp +*.tmp_proj +*_wpftmp.csproj +*.log +*.vspscc +*.vssscc +.builds +*.pidb +*.svclog +*.scc + +# Chutzpah Test files +_Chutzpah* + +# Visual C++ cache files +ipch/ +*.aps +*.ncb +*.opendb +*.opensdf +*.sdf +*.cachefile +*.VC.db +*.VC.VC.opendb + +# Visual Studio profiler +*.psess +*.vsp +*.vspx +*.sap + +# Visual Studio Trace Files +*.e2e + +# TFS 2012 Local Workspace +$tf/ + +# Guidance Automation Toolkit +*.gpState + +# ReSharper is a .NET coding add-in +_ReSharper*/ +*.[Rr]e[Ss]harper +*.DotSettings.user + +# TeamCity is a build add-in +_TeamCity* + +# DotCover is a Code Coverage Tool +*.dotCover + +# AxoCover is a Code Coverage Tool +.axoCover/* +!.axoCover/settings.json + +# Coverlet is a free, cross platform Code Coverage Tool +coverage*.json +coverage*.xml +coverage*.info + +# Visual Studio code coverage results +*.coverage +*.coveragexml + +# NCrunch +_NCrunch_* +.*crunch*.local.xml +nCrunchTemp_* + +# MightyMoose +*.mm.* +AutoTest.Net/ + +# Web workbench (sass) +.sass-cache/ + +# Installshield output folder +[Ee]xpress/ + +# DocProject is a documentation generator add-in +DocProject/buildhelp/ +DocProject/Help/*.HxT +DocProject/Help/*.HxC +DocProject/Help/*.hhc +DocProject/Help/*.hhk +DocProject/Help/*.hhp +DocProject/Help/Html2 +DocProject/Help/html + +# Click-Once directory +publish/ + +# Publish Web Output +*.[Pp]ublish.xml +*.azurePubxml +# Note: Comment the next line if you want to checkin your web deploy settings, +# but database connection strings (with potential passwords) will be unencrypted +*.pubxml +*.publishproj + +# Microsoft Azure Web App publish settings. Comment the next line if you want to +# checkin your Azure Web App publish settings, but sensitive information contained +# in these scripts will be unencrypted +PublishScripts/ + +# NuGet Packages +*.nupkg +# NuGet Symbol Packages +*.snupkg +# The packages folder can be ignored because of Package Restore +**/[Pp]ackages/* +# except build/, which is used as an MSBuild target. +!**/[Pp]ackages/build/ +# Uncomment if necessary however generally it will be regenerated when needed +#!**/[Pp]ackages/repositories.config +# NuGet v3's project.json files produces more ignorable files +*.nuget.props +*.nuget.targets + +# Microsoft Azure Build Output +csx/ +*.build.csdef + +# Microsoft Azure Emulator +ecf/ +rcf/ + +# Windows Store app package directories and files +AppPackages/ +BundleArtifacts/ +Package.StoreAssociation.xml +_pkginfo.txt +*.appx +*.appxbundle +*.appxupload + +# Visual Studio cache files +# files ending in .cache can be ignored +*.[Cc]ache +# but keep track of directories ending in .cache +!?*.[Cc]ache/ + +# Others +ClientBin/ +~$* +*~ +*.dbmdl +*.dbproj.schemaview +*.jfm +*.pfx +*.publishsettings +orleans.codegen.cs + +# Including strong name files can present a security risk +# (https://github.com/github/gitignore/pull/2483#issue-259490424) +#*.snk + +# Since there are multiple workflows, uncomment next line to ignore bower_components +# (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) +#bower_components/ + +# RIA/Silverlight projects +Generated_Code/ + +# Backup & report files from converting an old project file +# to a newer Visual Studio version. Backup files are not needed, +# because we have git ;-) +_UpgradeReport_Files/ +Backup*/ +UpgradeLog*.XML +UpgradeLog*.htm +ServiceFabricBackup/ +*.rptproj.bak + +# SQL Server files +*.mdf +*.ldf +*.ndf + +# Business Intelligence projects +*.rdl.data +*.bim.layout +*.bim_*.settings +*.rptproj.rsuser +*- [Bb]ackup.rdl +*- [Bb]ackup ([0-9]).rdl +*- [Bb]ackup ([0-9][0-9]).rdl + +# Microsoft Fakes +FakesAssemblies/ + +# GhostDoc plugin setting file +*.GhostDoc.xml + +# Node.js Tools for Visual Studio +.ntvs_analysis.dat +node_modules/ + +# Visual Studio 6 build log +*.plg + +# Visual Studio 6 workspace options file +*.opt + +# Visual Studio 6 auto-generated workspace file (contains which files were open etc.) +*.vbw + +# Visual Studio LightSwitch build output +**/*.HTMLClient/GeneratedArtifacts +**/*.DesktopClient/GeneratedArtifacts +**/*.DesktopClient/ModelManifest.xml +**/*.Server/GeneratedArtifacts +**/*.Server/ModelManifest.xml +_Pvt_Extensions + +# Paket dependency manager +.paket/paket.exe +paket-files/ + +# FAKE - F# Make +.fake/ + +# CodeRush personal settings +.cr/personal + +# Python Tools for Visual Studio (PTVS) +__pycache__/ + + +# Cake - Uncomment if you are using it +# tools/** +# !tools/packages.config + +# Tabs Studio +*.tss + +# Telerik's JustMock configuration file +*.jmconfig + +# BizTalk build output +*.btp.cs +*.btm.cs +*.odx.cs +*.xsd.cs + +# OpenCover UI analysis results +OpenCover/ + +# Azure Stream Analytics local run output +ASALocalRun/ + +# MSBuild Binary and Structured Log +*.binlog + +# NVidia Nsight GPU debugger configuration file +*.nvuser + +# MFractors (Xamarin productivity tool) working folder +.mfractor/ + +# Local History for Visual Studio +.localhistory/ + +# BeatPulse healthcheck temp database +healthchecksdb + +# Backup folder for Package Reference Convert tool in Visual Studio 2017 +MigrationBackup/ + +# Ionide (cross platform F# VS Code tools) working folder +.ionide/ + +# Fody - auto-generated XML schema +FodyWeavers.xsd + +# build +build +monotonic_align/core.c +*.o +*.so +*.dll + +# data +/config.json +/*.pth +*.wav +*.mp3 +/resources +/MoeGoe.spec +/dist/MoeGoe +/dist +Gao/ +.idea + + +# custom +vits_model +model/*.pth +vosk-model-* +whisper* +temp +*.whl +*demo* +Emoji/ +api-for-open-llm/models +datasets/ +ChatHaruhi-Expand-118K/ + +!examples/example_recording.wav + +music/ +story \ No newline at end of file diff --git a/README.md b/README.md new file mode 100644 index 0000000..a2f11eb --- /dev/null +++ b/README.md @@ -0,0 +1,48 @@ + +## 客户端 `Client`前端 + +### 硬件交互前端服务 + +#### 介绍 + +硬件交互前端服务是基于`WebSocket`协议的服务,用于与硬件设备进行通信。 + +### 环境准备 + +#### (1) 安装依赖项: + +``` +sudo apt-get update +sudo apt-get upgrade +sudo apt-get install cmake g++ gcc +sudo apt-get install python3-pip python3-dev portaudio19-dev libsndfile1 +``` + +#### (2) 克隆项目到本地并安装依赖: + +``` +// 克隆项目到本地 https or ssh +git clone https://github.com/Irvingao/takway_base.git or git clone git@github.com:Irvingao/takway_base.git +cd takway_base +pip install -v -e . +``` + +#### (3) 安装板载端环境依赖: + +``` +pip install -r requirements/board_requirements.txt +``` + +#### (4) 下载相关模型文件: + +- [Picovoice:](https://picovoice.ai/docs/quick-start/porcupine-python/)边缘端关键词唤醒模型 + +``` +目前使用的是`可莉可莉_zh_raspberry-pi_v3_0_0`版本,可以点击网站自行设置替换关键词模型。 +``` + +#### (5) 启动服务: + +``` +python ws_client.py +``` diff --git a/board_env_setup.sh b/board_env_setup.sh new file mode 100644 index 0000000..76efcab --- /dev/null +++ b/board_env_setup.sh @@ -0,0 +1,24 @@ +#!/bin/bash + +# 给bashrc添加自启动python脚本 +# echo "nohup python3 /home/orangepi/takway_base/ws_client.py& " >> ~/.bashrc + +# update system +sudo -S apt-get update -y << EOF +orangepi +EOF + +# install dependencies +sudo apt-get install -y cmake g++ gcc +sudo apt-get install -y python3-pip python3-dev portaudio19-dev libsndfile1 + +# install python dependencies +# git clone https://Irvingao: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@github.com/Irvingao/takway_base.git +git clone http://43.132.157.186:3000/gaohz/TakwayBoard.git +cd TakwayBoard +# git remote set-url origin https://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@github.com/Irvingao/takway_base.git +pip install -v -e . + +pip install -r requirements.txt + +reboot \ No newline at end of file diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..247619b --- /dev/null +++ b/requirements.txt @@ -0,0 +1,8 @@ +numpy +Unidecode +pyaudio +keyboard +websocket-client +pvporcupine +requests +webrtcvad \ No newline at end of file diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..ce859b1 --- /dev/null +++ b/setup.py @@ -0,0 +1,15 @@ +# setup.py + +from setuptools import setup, find_packages + +setup( + name='takway', + version='1.0', + packages=find_packages(), # 自动发现包和子包 + url='https://github.com/Irvingao/takway_base', + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: MIT License', + 'Operating System :: OS Independent', + ], +) diff --git a/takway.egg-info/PKG-INFO b/takway.egg-info/PKG-INFO new file mode 100644 index 0000000..9890984 --- /dev/null +++ b/takway.egg-info/PKG-INFO @@ -0,0 +1,13 @@ +Metadata-Version: 1.1 +Name: takway +Version: 1.0 +Summary: UNKNOWN +Home-page: https://github.com/Irvingao/takway_base +Author: UNKNOWN +Author-email: UNKNOWN +License: UNKNOWN +Description: UNKNOWN +Platform: UNKNOWN +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: MIT License +Classifier: Operating System :: OS Independent diff --git a/takway.egg-info/SOURCES.txt b/takway.egg-info/SOURCES.txt new file mode 100644 index 0000000..96a5b5e --- /dev/null +++ b/takway.egg-info/SOURCES.txt @@ -0,0 +1,21 @@ +README.md +setup.py +takway/__init__.py +takway/audio_utils.py +takway/cam_utils.py +takway/common_utils.py +takway/emo_utils.py +takway/picovoice_utils.py +takway.egg-info/PKG-INFO +takway.egg-info/SOURCES.txt +takway.egg-info/dependency_links.txt +takway.egg-info/top_level.txt +takway/board/__init__.py +takway/board/base_hd.py +takway/board/keyboard.py +takway/board/orangepi.py +takway/board/sipeed.py +takway/clients/__init__.py +takway/clients/client_utils.py +takway/clients/local_client_utils.py +takway/clients/web_socket_client_utils.py \ No newline at end of file diff --git a/takway.egg-info/dependency_links.txt b/takway.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/takway.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/takway.egg-info/top_level.txt b/takway.egg-info/top_level.txt new file mode 100644 index 0000000..9ed344f --- /dev/null +++ b/takway.egg-info/top_level.txt @@ -0,0 +1 @@ +takway diff --git a/takway/__init__.py b/takway/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/takway/apps/data_struct.py b/takway/apps/data_struct.py new file mode 100644 index 0000000..3f859fa --- /dev/null +++ b/takway/apps/data_struct.py @@ -0,0 +1,133 @@ +import multiprocessing +import queue +from typing import Iterator +import time +import json +import requests + +class QueueIterator: + def __init__(self, + queue, + timeout: int = 10): + self.queue = queue + self.timeout = timeout + + def __iter__(self) -> Iterator: + return self + + def __next__(self): + try: + data = self.queue.get(block=True, timeout=self.timeout) + if data is None: # 使用None作为结束信号 + # print("QueueIterator: End of data") + raise StopIteration + else: + # print("QueueIterator: Get data") + return data + except queue.Empty: + # print("QueueIterator: Queue is empty") + raise StopIteration + + + + +def producer(queue: multiprocessing.Queue): + for i in range(5): # 假设生产5个数据项 + data = {'data': i, 'is_end': False} + queue.put(data) + time.sleep(1) + queue.put(None) # 发送结束信号 + +def get_stream_data_from_queue(queue: multiprocessing.Queue): + for data in QueueIterator(queue): + print(data) + yield json.dumps({'line': data, 'is_end': False}) + # 模拟数据处理时间 + +if __name__ == "__main__": + queue = multiprocessing.Queue() + # 创建并启动生产者进程 + p = multiprocessing.Process(target=producer, args=(queue,)) + p.start() + + # 使用迭代器来消费Queue中的数据 + for data in QueueIterator(queue): + print(data) + + # 等待生产者进程结束 + p.join() + + +''' +# request body +{ + "AUTH_INFO": { + "user": "", # string + "authid": "", # string + "api_keys": "", # string + "timestamp": "", # string + }, + "DATA": { + "Audio": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "rate": ; # int + "channels": ; # int + "format": ; # int + } + }, + "Text": { + "data": "xxxxx", # base64 encoded data + "metadata": { + # TODO + } + }, + "Image": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "width": ; # int + "height": ; # int + "format": ; # string + } + } + } + "META_INFO": { + "model_type": "", # string + "model_version": "", # string + "model_url": "", # string + "vits": { + "speaker_id": ; # int + } +} + +# response body +{ + RESPONSE_INFO: { + "status": "success/error", # string + "message": "xxxxx", # string + } + "DATA": { + "Audio": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "rate": ; # int + "channels": ; # int + "format": ; # int + } + }, + "Text": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "is_end": True/False, # bool + } + } + "Image": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "width": ; # int + "height": ; # int + "format": ; # string + } + } +} +''' \ No newline at end of file diff --git a/takway/audio_utils.py b/takway/audio_utils.py new file mode 100644 index 0000000..e15f832 --- /dev/null +++ b/takway/audio_utils.py @@ -0,0 +1,578 @@ +import io +import os +import time +import pyaudio +import wave +import json +import warnings +import threading +import numpy as np +from collections import deque + +from .common_utils import encode_bytes2str, decode_str2bytes + +from takway.board import * +try: + import keyboard +except: + pass + +def play_audio(audio_data, type='base64'): + ''' + 读取base64编码的音频流并播放 + ''' + # PyAudio配置 + p = pyaudio.PyAudio() + stream = p.open(format=pyaudio.paInt16, channels=1, rate=22050, output=True) + + # 播放音频 + stream.write(audio_data) + stream.stop_stream() + stream.close() + p.terminate() + +''' +import librosa +def reshape_sample_rate(audio, sr_original=None, sr_target=16000): + # 获取原始采样率和音频数据 + if isinstance(audio, tuple): + sr_original, audio_data = audio + elif isinstance(audio, bytes): + audio_data = np.frombuffer(audio, dtype=np.int16) + assert sr_original is not None, f"sr_original should be provided if audio is a \ + numpy.ndarray, but got sr_original `{sr_original}`." + + if isinstance(audio_data, np.ndarray): + if audio_data.dtype == np.dtype('int16'): + audio_data = audio_data.astype(np.float32) / np.iinfo(np.int16).max + assert audio_data.dtype == np.dtype('float32'), f"audio_data should be float32, \ + but got {audio_data.dtype}." + else: + raise TypeError(f"audio_data should be numpy.ndarray, but got {type(audio_data)}.") + + # 重新采样音频数据 + audio_data_resampled = librosa.resample(audio_data, orig_sr=sr_original, target_sr=sr_target) + + if audio_data_resampled.dtype == np.dtype('float32'): + audio_data_resampled = np.int16(audio_data_resampled * np.iinfo(np.int16).max) + + # If the input was bytes, return the resampled data as bytes + if isinstance(audio, bytes): + audio_data_resampled = audio_data_resampled.tobytes() + + return audio_data_resampled + +# Example usage: +# If your audio data is in bytes: +# audio_bytes = b'...' # Your audio data as bytes +# audio_data_resampled = reshape_sample_rate(audio_bytes) + +# If your audio data is in numpy int16: +# audio_int16 = np.array([...], dtype=np.int16) # Your audio data as numpy int16 +# audio_data_resampled = reshape_sample_rate(audio_int16) +''' + + + +# ####################################################### # +# base audio class +# ####################################################### # + +class BaseAudio: + def __init__(self, + filename=None, + input=False, + output=False, + CHUNK=1024, + FORMAT=pyaudio.paInt16, + CHANNELS=1, + RATE=16000, + input_device_index=None, + output_device_index=None, + **kwargs): + self.CHUNK = CHUNK + self.FORMAT = FORMAT + self.CHANNELS = CHANNELS + self.RATE = RATE + self.filename = filename + assert input!= output, "input and output cannot be the same, \ + but got input={} and output={}.".format(input, output) + print("------------------------------------------") + print(f"{'Input' if input else 'Output'} Audio Initialization: ") + print(f"CHUNK: {self.CHUNK} \nFORMAT: {self.FORMAT} \nCHANNELS: {self.CHANNELS} \nRATE: {self.RATE} \ninput_device_index: {input_device_index} \noutput_device_index: {output_device_index}") + print("------------------------------------------") + self.p = pyaudio.PyAudio() + self.stream = self.p.open(format=FORMAT, + channels=CHANNELS, + rate=RATE, + input=input, + output=output, + input_device_index=input_device_index, + output_device_index=output_device_index, + **kwargs) + + def load_audio_file(self, wav_file): + with wave.open(wav_file, 'rb') as wf: + params = wf.getparams() + frames = wf.readframes(params.nframes) + print("Audio file loaded.") + # Audio Parameters + # print("Channels:", params.nchannels) + # print("Sample width:", params.sampwidth) + # print("Frame rate:", params.framerate) + # print("Number of frames:", params.nframes) + # print("Compression type:", params.comptype) + return frames + + def check_audio_type(self, audio_data, return_type=None): + assert return_type in ['bytes', 'io', None], \ + "return_type should be 'bytes', 'io' or None." + if isinstance(audio_data, str): + if len(audio_data) > 50: + audio_data = decode_str2bytes(audio_data) + else: + assert os.path.isfile(audio_data), \ + "audio_data should be a file path or a bytes object." + wf = wave.open(audio_data, 'rb') + audio_data = wf.readframes(wf.getnframes()) + elif isinstance(audio_data, np.ndarray): + if audio_data.dtype == np.dtype('float32'): + audio_data = np.int16(audio_data * np.iinfo(np.int16).max) + audio_data = audio_data.tobytes() + elif isinstance(audio_data, bytes): + pass + else: + raise TypeError(f"audio_data must be bytes, numpy.ndarray or str, \ + but got {type(audio_data)}") + + if return_type == None: + return audio_data + return self.write_wave(None, [audio_data], return_type) + + def write_wave(self, filename, frames, return_type='io'): + """Write audio data to a file.""" + if isinstance(frames, bytes): + frames = [frames] + if not isinstance(frames, list): + raise TypeError("frames should be \ + a list of bytes or a bytes object, \ + but got {}.".format(type(frames))) + + if return_type == 'io': + if filename is None: + filename = io.BytesIO() + if self.filename: + filename = self.filename + return self.write_wave_io(filename, frames) + elif return_type == 'bytes': + return self.write_wave_bytes(frames) + + + def write_wave_io(self, filename, frames): + """ + Write audio data to a file-like object. + + Args: + filename: [string or file-like object], file path or file-like object to write + frames: list of bytes, audio data to write + """ + wf = wave.open(filename, 'wb') + + # 设置WAV文件的参数 + wf.setnchannels(self.CHANNELS) + wf.setsampwidth(self.p.get_sample_size(self.FORMAT)) + wf.setframerate(self.RATE) + wf.writeframes(b''.join(frames)) + wf.close() + if isinstance(filename, io.BytesIO): + filename.seek(0) # reset file pointer to beginning + return filename + + def write_wave_bytes(self, frames): + """Write audio data to a bytes object.""" + return b''.join(frames) + + +# ####################################################### # +# play audio data from Speaker +# ####################################################### # + +class AudioPlayer(BaseAudio): + def __init__(self, + RATE=22050, + **kwargs): + super().__init__(output=True, RATE=RATE, **kwargs) + + def play(self, audio_data): + # print("Playing audio data...") + audio_data = self.check_audio_type(audio_data, return_type=None) + + for i in range(0, len(audio_data), self.CHUNK): + self.stream.write(audio_data[i:i+self.CHUNK]) + # print("Playing audio data...{}/{}".format(i, len(audio_data))) + self.stream.write(audio_data[i+self.CHUNK:]) + # print("Audio data played.") + + + def close(self): + self.stream.stop_stream() + self.stream.close() + self.p.terminate() + +# ####################################################### # +# record audio data from microphone +# ####################################################### # +class BaseRecorder(BaseAudio): + def __init__(self, + input=True, + base_chunk_size=None, + RATE=16000, + **kwargs): + super().__init__(input=input, RATE=RATE, **kwargs) + self.base_chunk_size = base_chunk_size + if base_chunk_size is None: + self.base_chunk_size = self.CHUNK + + def record(self, + filename, + duration=5, + return_type='io', + logger=None): + if logger is not None: + logger.info("Recording started.") + else: + print("Recording started.") + frames = [] + for i in range(0, int(self.RATE / self.CHUNK * duration)): + data = self.stream.read(self.CHUNK, exception_on_overflow=False) + frames.append(data) + if logger is not None: + logger.info("Recording stopped.") + else: + print("Recording stopped.") + return self.write_wave(filename, frames, return_type) + + def record_chunk_voice(self, + return_type='bytes', + CHUNK=None, + exception_on_overflow=True, + queue=None): + data = self.stream.read(self.CHUNK if CHUNK is None else CHUNK, + exception_on_overflow=exception_on_overflow) + if return_type is not None: + return self.write_wave(None, [data], return_type) + return data + + +class HDRecorder(BaseRecorder): + def __init__(self, + board=None, + hd_trigger='keyboard', + keyboard_key='space', + voice_trigger=True, + hd_chunk_size=None, + hd_detect_threshold=50, + **kwargs): + super().__init__(**kwargs) + assert hd_trigger in ['keyboard', 'button'] + + self.hd_trigger = hd_trigger + self.voice_trigger = voice_trigger + + self.hd_chunk_size = hd_chunk_size + if hd_chunk_size is None: + self.hd_chunk_size = self.base_chunk_size + + if board == None: + assert hd_trigger == 'keyboard', "board should be `None` if hd_trigger is `keyboard`." + self.keyboard_key = keyboard_key + self.hardware = Keyboard(hd_trigger, keyboard_key, hd_detect_threshold) + else: + assert hd_trigger == 'button', f"hd_trigger should be `button` if board is `v329` or `orangepi`, but got `{hd_trigger}`." + if board == 'v329': + self.hardware = V329(hd_trigger, hd_detect_threshold) + elif board == 'orangepi': + self.hardware = OrangePi(hd_trigger, hd_detect_threshold) + print(f"Using {hd_trigger} as hardware trigger.") + + def wait_for_hardware_pressed(self): + return self.hardware.wait_for_hardware_pressed() + + @property + def is_hardware_pressed(self): + return self.hardware.is_hardware_pressed + + def record_hardware(self, return_type='bytes'): + """record audio when hardware trigger""" + print("Recording started for hardware trigger.") + frames = [] + self.wait_for_hardware_pressed() + while True: + if self.hd_trigger == 'keyboard': + if keyboard.is_pressed(self.keyboard_key): + print("recording...") + data = self.record_chunk_voice( + CHUNK=self.CHUNK, + return_type=None, + exception_on_overflow=False) + frames.append(data) + else: + break + print("Recording stopped.") + elif self.hd_trigger == 'button': + if self.get_button_status(): + data = self.stream.read(self.CHUNK) + frames.append(data) + else: + break + else: + recording = False + raise ValueError("hd_trigger should be 'keyboard' or 'button'.") + return self.write_wave(self.filename, frames, return_type) + + ''' + def record(self, return_type='bytes', queue=None): + if self.hd_trigger == 'all': + value_list = [] # 用于记录value的状态 + if keyboard.is_pressed(self.keyboard_key): + audio_data = self.record_keyboard(return_type, queue) + elif self.button.get_value() == 0: + if self.get_button_status(): + audio_data = self.record_button(return_type, queue) + else: + audio_data = self.record_voice(return_type, queue) + elif self.hd_trigger == 'keyboard': + print("Press SPACE to start recording.") + keyboard.wait("space") + audio_data = self.record_keyboard(return_type, queue) + elif self.hd_trigger == 'button': + print("Touch to start recording...") + if self.button.get_value() == 0: + if self.get_button_status(): + audio_data = self.record_button(return_type, queue) + else: + audio_data = self.record_voice(return_type, queue) + + return audio_data + + def record_keyboard(self, return_type='bytes', queue=None): + """record audio when keyboard pressing""" + print("Recording started.") + frames = [] + recording = True + while recording: + if keyboard.is_pressed(self.keyboard_key): + data = self.stream.read(self.CHUNK) + frames.append(data) + else: + recording = False + print("Recording stopped.") + return self.write_wave(self.filename, frames, return_type) + + def record_button(self, return_type='bytes', queue=None): + """record audio when button pressing""" + print("Recording started.") + frames = [] + recording = True + while recording: + value = self.button.get_value() + if value == 0: + data = self.stream.read(CHUNK) + frames.append(data) + else: + recording = False + print("Recording stopped.") + return self.write_wave(self.filename, frames, return_type) + ''' + +# ####################################################### # +# record audio data from microphone with VAD +# ####################################################### # +try: + import webrtcvad + webrtcvad_available = True +except: + warnings.warn("webrtcvad module not found, please install it if use `vad` hd_trigger.") + webrtcvad_available = False + +class VADRecorder(HDRecorder): + def __init__(self, vad_sensitivity=0, frame_duration=30, vad_buffer_size=7, min_act_time=1,**kwargs): + super().__init__(**kwargs) + if webrtcvad_available: + self.vad = webrtcvad.Vad(vad_sensitivity) + self.vad_buffer_size = vad_buffer_size + self.vad_chunk_size = int(self.RATE * frame_duration / 1000) + + self.min_act_time = min_act_time # 最小活动时间,单位秒 + + self.is_currently_speaking = False + self.frames = [] + + def is_speech(self, data): + return self.vad.is_speech(data, self.RATE) + + def vad_filter(self, data): + pass + + + def vad_record(self, return_type='io', CHUNK=None, queue=None, save_file=False): + """录音并进行语音活动检测人声并返回分割后的音频数据""" + all_frames = [] + + buffer_size = self.vad_buffer_size + active_buffer = deque([False for i in range(buffer_size)], maxlen=buffer_size) + audio_buffer = deque(maxlen=buffer_size) + silence_buffer = deque([True for i in range(buffer_size)], maxlen=buffer_size) + + print("vad_recorded_audio VAD started. Press Ctrl+C to stop.") + try: + while True: + data = self.stream.read(self.vad_chunk_size) + all_frames.append(data) + print(f"VAD processing..., is_speech: {self.is_speech(data)}") + if self.is_speech(data): + # 标志位buffer + active_buffer.append(True); active_buffer.popleft() + silence_buffer.append(False); silence_buffer.popleft() + # 暂时增加到buffer中 + audio_buffer.append(data) + # 如果满足检测要求 + if all(active_buffer): + if not self.is_currently_speaking: + print("Speech start detected") + self.is_currently_speaking = True + self.frames.extend(audio_buffer) # 把说话的buffer也加上 + if self.is_currently_speaking: + self.frames.append(data) + else: + # 标志位buffer + active_buffer.append(False); active_buffer.popleft() + silence_buffer.append(True); silence_buffer.popleft() + # 检测到人声并持续录音 + if self.is_currently_speaking: + # 结束标志位 + if all(silence_buffer): + print("Speech end detected") + break + except KeyboardInterrupt: + print("KeyboardInterrupt") + + finally: + print("Stopping...") + if len(all_frames) > 0: + print(f"ALL frame: {len(all_frames)}") + print(f"ASR frame: {len(self.frames)}") + if save_file: + self.write_wave(f"output_{time.time()}_all.wav", all_frames) + self.write_wave(f"output_{time.time()}.wav", self.frames) + return self.write_wave(None, self.frames, return_type='bytes') + + +# ####################################################### # +# record audio data from microphone with PicoVoice hot words detection +# ####################################################### # + +import struct +from datetime import datetime +import pvporcupine + +class PicovoiceRecorder(VADRecorder): + def __init__(self, + access_key, + keywords=None, + keyword_paths=None, + model_path=None, + sensitivities=0.5, + library_path=None, + **kwargs): + + super().__init__(**kwargs) + + pico_cfg = dict( + access_key=access_key, + keywords=keywords, + keyword_paths=keyword_paths, + model_path=model_path, + sensitivities=sensitivities, + library_path=library_path, + ) + + self.pico_detector_init(pico_cfg) + + self.keywords = self.pico_cfg['keywords'] + print(f"PicovoiceRecorder initialized with keywords: {self.keywords}") + + def pico_detector_init(self, pico_cfg): + if pico_cfg['keyword_paths'] is None: + if pico_cfg['keywords'] is None: + raise ValueError(f"Either `--keywords` or `--keyword_paths` must be set. \ + Available keywords: {list(pvporcupine.KEYWORDS)}") + + keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in pico_cfg['keywords']] + else: + keyword_paths = pico_cfg['keyword_paths'] + + if pico_cfg['sensitivities'] is None: + pico_cfg['sensitivities'] = [0.5] * len(keyword_paths) + elif isinstance(pico_cfg['sensitivities'], float): + pico_cfg['sensitivities'] = [pico_cfg['sensitivities']] * len(keyword_paths) + + if len(keyword_paths) != len(pico_cfg['sensitivities']): + raise ValueError('Number of keywords does not match the number of sensitivities.') + + try: + self.porcupine = pvporcupine.create( + access_key=pico_cfg['access_key'], + keywords=pico_cfg['keywords'], + keyword_paths=keyword_paths, + model_path=pico_cfg['model_path'], + sensitivities=pico_cfg['sensitivities'], + library_path=pico_cfg['library_path']) + except pvporcupine.PorcupineInvalidArgumentError as e: + print("One or more arguments provided to Porcupine is invalid: ", pico_cfg.keys()) + print(e) + raise e + except pvporcupine.PorcupineActivationError as e: + print("AccessKey activation error") + raise e + except pvporcupine.PorcupineActivationLimitError as e: + print("AccessKey '%s' has reached it's temporary device limit" % pico_cfg['access_key']) + raise e + except pvporcupine.PorcupineActivationRefusedError as e: + print("AccessKey '%s' refused" % pico_cfg['access_key']) + raise e + except pvporcupine.PorcupineActivationThrottledError as e: + print("AccessKey '%s' has been throttled" % pico_cfg['access_key']) + raise e + except pvporcupine.PorcupineError as e: + print("Failed to initialize Porcupine") + raise e + + self.pico_cfg = pico_cfg + + def is_wakeup(self, data): + pcm = struct.unpack_from("h" * self.porcupine.frame_length, data) + result = self.porcupine.process(pcm) + # print(f"picovoice result: {result}") + if result >= 0: + print('[%s] Detected %s' % (str(datetime.now()), self.keywords[result])) + return True + # self.write_wave(f"output_{time.time()}.wav", [data]) + # print(f"write to: output_{time.time()}.wav") + return False + + + def record_picovoice(self, return_type=None, exception_on_overflow=False, queue=None): + + print("Recording started. Press Ctrl+C to stop.") + while True: + data = self.record_chunk_voice( + return_type=None, + CHUNK=self.porcupine.frame_length, + exception_on_overflow=exception_on_overflow, + queue=queue) + + wake_up = self.is_wakeup(data) + if wake_up: + break + return True diff --git a/takway/board/__init__.py b/takway/board/__init__.py new file mode 100644 index 0000000..9f1a159 --- /dev/null +++ b/takway/board/__init__.py @@ -0,0 +1,4 @@ +from .base_hd import BaseHardware +from .keyboard import Keyboard +from .sipeed import V329 +from .orangepi import OrangePi \ No newline at end of file diff --git a/takway/board/base_hd.py b/takway/board/base_hd.py new file mode 100644 index 0000000..385b8b9 --- /dev/null +++ b/takway/board/base_hd.py @@ -0,0 +1,32 @@ +import threading +import time + +class BaseHardware: + def __init__(self, hd_trigger=None, hd_detect_threshold=50): + self.hd_trigger = hd_trigger + self.hd_detect_threshold = hd_detect_threshold + + self.hd_lock = threading.Lock() + self.shared_hd_status = False + + + def init_hd_thread(self): + hd_thread = threading.Thread(target=self.hd_detection_loop) + hd_thread.start() + # hd_thread.join() + print("HD detection thread started.") + + def hd_detection_loop(self): + pass + + @property + def is_hardware_pressed(self): + return self.shared_hd_status + + def wait_for_hardware_pressed(self): + print("Waiting for hardware trigger.") + while True: + if self.is_hardware_pressed: + time.sleep(0.01) + break + return True \ No newline at end of file diff --git a/takway/board/keyboard.py b/takway/board/keyboard.py new file mode 100644 index 0000000..2d4e24d --- /dev/null +++ b/takway/board/keyboard.py @@ -0,0 +1,36 @@ +import keyboard +import time + +from takway.board.base_hd import BaseHardware + +import datetime +# + +class Keyboard(BaseHardware): + def __init__(self, hd_trigger='keyboard', keyboard_key='space', hd_detect_threshold=50): + super().__init__(hd_trigger, hd_detect_threshold) + + self.keyboard_key = keyboard_key + self.init_hd_thread() + self.power_status = False # 单次触发按键状态 + + def hd_detection_loop(self): + keyboard_status = False + last_status = False + while True: + ''' + keyboard_status = keyboard.is_pressed(self.keyboard_key) + with self.hd_lock: + self.shared_hd_status = keyboard_status + ''' + self.shared_hd_status = keyboard.is_pressed(self.keyboard_key) + time.sleep(0.001) + + if not self.shared_hd_status and last_status: + self.power_status = ~self.power_status + if self.power_status: + print("Chating mode.") + else: + print("Slience mode.") + print(f"pres time: {datetime.datetime.now()}") + last_status = self.shared_hd_status \ No newline at end of file diff --git a/takway/board/orangepi.py b/takway/board/orangepi.py new file mode 100644 index 0000000..ebe54aa --- /dev/null +++ b/takway/board/orangepi.py @@ -0,0 +1,94 @@ +from takway.board.base_hd import BaseHardware +import threading +import datetime +try: + import wiringpi + from wiringpi import GPIO +except: + pass + + +''' +| GPIO | LED | +| -- | - - | +| 0 | 红色 | +| 1 | 黄色 | +| 2 | 绿色 | +| 3 | 蓝色 | +| 4 | 白色 | + +| GPIO | BUTTOM | +| -- | ---- | +| 6 | 按键1 | +| 8 | 按键2 | +''' + +class OrangePi(BaseHardware): + def __init__(self, hd_trigger='button', hd_detect_threshold=50): + super().__init__(hd_trigger, hd_detect_threshold) + + self.LED_PIN_red = 0 + self.LED_PIN_yellow = 1 + self.LED_PIN_green = 2 + self.LED_PIN_blue = 3 + self.LED_PIN_white = 4 + + self.BUTTON_PIN_1 = 6 + self.BUTTON_PIN_2 = 8 + + self.button_status_2 = False + self.led_set_status_2 = False + + self.power_status = False # 单次触发按键状态 + + self.button_init() + self.init_hd_thread() + + def button_init(self): + wiringpi.wiringPiSetup() + # GPIO 输出模式 + wiringpi.pinMode(self.LED_PIN_red,GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_yellow,GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_green,GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_blue,GPIO.OUTPUT) + wiringpi.pinMode(self.LED_PIN_white,GPIO.OUTPUT) + # GPIO 输入模式 + wiringpi.pinMode(self.BUTTON_PIN_1,GPIO.INPUT) + wiringpi.pinMode(self.BUTTON_PIN_2,GPIO.INPUT) + + def init_hd_thread(self): + # hd_threads = [threading.Thread(target=self.hd_detection_loop), + # threading.Thread(target=self.hd_detection_loop_2)] + hd_threads = [threading.Thread(target=self.hd_detection_loop)] + for hd_thread in hd_threads: + hd_thread.start() + + def hd_detection_loop(self): + keyboard_status = False + last_status = False + while True: + self.button_status = wiringpi.digitalRead(self.BUTTON_PIN_1) + if self.button_status: + wiringpi.digitalWrite(self.LED_PIN_red, GPIO.LOW) + else: + wiringpi.digitalWrite(self.LED_PIN_red,GPIO.HIGH) + + if not self.button_status and last_status: + self.power_status = ~self.power_status + if self.power_status: + print("Chating mode.") + else: + print("Slience mode.") + print(f"pres time: {datetime.datetime.now()}") + last_status = self.button_status + + def set_led_on(self, color='red'): + wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.HIGH) + + def set_led_off(self, color='red'): + wiringpi.digitalWrite(getattr(self, f'LED_PIN_{color}'), GPIO.LOW) + +if __name__ == '__main__': + orangepi = OrangePi() + while True: + pass \ No newline at end of file diff --git a/takway/board/sipeed.py b/takway/board/sipeed.py new file mode 100644 index 0000000..0afb691 --- /dev/null +++ b/takway/board/sipeed.py @@ -0,0 +1,58 @@ +import sys +import warnings +import threading +import time +from collections import deque + +from takway.board.base_hd import BaseHardware + +if "gpiod" in sys.modules: + # sipeed MaixSense V329 + import gpiod as gpio +else: + # 如果所有库都不存在,执行默认操作或抛出异常 + # raise ImportError("gpiod package is not available.") + pass + +class V329(BaseHardware): + def __init__(self, hd_trigger='button', hd_detect_threshold=50): + super().__init__(hd_trigger, hd_detect_threshold) + self.button = self.button_init() + + self.init_hd_thread() + + def button_init(self): + PH_BASE = (8-1)*32 #PH + + gpiochip1 = gpio.chip("gpiochip1") + button = gpiochip1.get_line((PH_BASE+5)) + config = gpio.line_request() + config.request_type = gpio.line_request.DIRECTION_INPUT + config.flags = gpio.line_request.FLAG_BIAS_PULL_UP + button.request(config) + return button + + @property + def button_status(self): + return True if self.button.get_value() == 1 else False + + def hd_detection_loop(self): + self.shared_hd_status = False + button_value_list = deque(maxlen=self.hd_detect_threshold) + + while True: + if len(button_value_list) > button_value_list.maxlen: + button_value_list.popleft() + button_value_list.append(self.button_status) + # 记录50个值,如果连续50个值都是True,则认为按钮被按下 + if button_value_list.count(True) == button_value_list.maxlen: + with self.hd_lock: + self.shared_hd_status = True + # 记录50个值,如果连续50个值都是False,则认为按钮被松开 + if button_value_list.count(False) == button_value_list.maxlen: + with self.hd_lock: + self.shared_hd_status = False + + + + diff --git a/takway/cam_utils.py b/takway/cam_utils.py new file mode 100644 index 0000000..5c51ce1 --- /dev/null +++ b/takway/cam_utils.py @@ -0,0 +1,11 @@ +try: + import cv2 +except: + warnings.warn("OpenCV is not installed, please check the module if you need.") + +class Camera: + def __init__(self, + device='pc', + width=1280, + height=720): + pass \ No newline at end of file diff --git a/takway/clients/__init__.py b/takway/clients/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/takway/clients/client_utils.py b/takway/clients/client_utils.py new file mode 100644 index 0000000..d795925 --- /dev/null +++ b/takway/clients/client_utils.py @@ -0,0 +1,160 @@ +import os +import json +import time +import datetime +import requests + +from takway.common_utils import encode_bytes2str, decode_str2bytes + +''' +{ + "RESPONSE_INFO": { + "status": "success/error", # string + "message": "xxxxx", # string + }, + "DATA": { + "Audio": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "rate": ; # int + "channels": ; # int + "format": ; # int + } + }, + "Text": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "is_end": True/False, # bool + } + }, + "Image": { + "data": "xxxxx", # base64 encoded data + "metadata": { + "width": ; # int + "height": ; # int + "format": ; # string + } + } + } +} +''' + +class Client: + def __init__(self, server_url): + self.server_url = server_url + + def gen_request_data(self, **kwargs): + # print("kwargs:", kwargs) + audio_data = kwargs.get("audio_data", None) + text_data = kwargs.get("text_data", dict()) + return json.dumps( + { + "is_end": audio_data.get("is_end"), # bool + "is_bgn": audio_data.get("is_bgn"), # bool + "DATA": { + "Audio": { + "data": encode_bytes2str(audio_data['frames']), # base64 encoded data + "metadata": { + "frames_size": audio_data.get("frames_size"), # string + "chunk_size": audio_data.get("chunk_size"), # int + "is_end": audio_data.get("is_end"), # bool + } + }, + "Text": { + "data": text_data.get("text"), # base64 encoded data + "metadata": { + "chat_status": text_data.get("chat_status"), # string + "chat_history": text_data.get("chat_history"), # list of dict + } + }, + }, + "META_INFO": { + # "model_version": kwargs.get("model_version", ""), # string + # "model_url": kwargs.get("model_url", ""), # string + "character": { + "name": kwargs.get("character", "Klee"), # string + "speaker_id": kwargs.get("speaker_id", 113), # int + "wakeup_words": kwargs.get("wakeup_words", "可莉来啦"), # list of string + } + } + } + ) + '\n' + + def send_data_to_server(self, **kwargs): + return requests.post(self.server_url, + data=self.gen_request_data(**kwargs), stream=True) + + + + +# ############################################ # +# ############ WebSocket Client ############# # +def check_audio_type(data, return_type='base64'): + ''' + Check if the data type is valid. + ''' + assert return_type in ['bytes', 'base64'] + if return_type == 'base64': + if isinstance(data, bytes): + return encode_bytes2str(data) + elif return_type == 'bytes': + if isinstance(data, str): + return decode_str2bytes(data) + else: + raise ValueError('Invalid data type: {}.'.format(type(data))) + +import websocket +from websocket import create_connection + +class BaseWebSocketClient: + def __init__(self, server_url, session_id): + self.server_url = server_url + self.session_id = session_id + + def wakeup_client(self): + ''' + Start the client. + ''' + self.websocket = create_connection(self.server_url) + + def send_per_data(self, + text: str = '', + audio: bytes = b'', + stream: bool = True, + voice_synthesize: bool = False, + is_end: bool = False, + encoding: str = 'base64', + ): + ''' + Send data to server. + + Args: + data: bytes, data to be sent to server. + ''' + + self.websocket.send(json.dumps({ + "text": text, + "audio": check_audio_type(audio, return_type=encoding), + "meta_info": { + "session_id": self.session_id, + "stream": stream, + "voice_synthesize": voice_synthesize, + "is_end": is_end, + "encoding": encoding, + }})) + + def receive_per_data(self): + try: + recv_data = self.websocket.recv() + except websocket._exceptions.WebSocketConnectionClosedException: + return None, None + try: + recv_data = json.loads(recv_data) + except json.JSONDecodeError as e: + # print(f"JSONDecodeError: {e}") + # is_end = True + pass + except Exception as e: + # print(f"receive_per_data error: {e}") + assert isinstance(recv_data, bytes), ValueError(f"Received data is not bytes, got {type(recv_data)}.") + return recv_data, type(recv_data) \ No newline at end of file diff --git a/takway/clients/local_client_utils.py b/takway/clients/local_client_utils.py new file mode 100644 index 0000000..85d3a6f --- /dev/null +++ b/takway/clients/local_client_utils.py @@ -0,0 +1,513 @@ +# basic +import io +import time +import json +import random +from collections import deque +# log +import logging +import warnings +# multiprocessing +import queue +import threading +import multiprocessing +# web request +import requests +import pyaudio +# hot words detection +import pvporcupine + +from takway.apps.data_struct import QueueIterator +from takway.common_utils import * +from takway.audio_utils import PicovoiceRecorder +from takway.clients.client_utils import CharacterClient +from takway.audio_utils import AudioPlayer +from takway.emo_utils import EmoVideoPlayer + + +class LocalClinet: + def __init__(self, + server_args, + recorder_args, + video_args, + emo_args, + log_args): + # server_args + self.server_args = server_args + # recorder_args + self.recorder_args = recorder_args + # video_args + self.video_args = video_args + # emo_args + self.emo_args = emo_args + # log_args + self.log_args = log_args + + # TODO: 设计多进程log queue + self.logger_init() + + + def logger_init(self): + # log_args + log_level = self.log_args['log_level'] + log_file = self.log_args['log_file'] + + if log_level == 'debug': + log_level = logging.DEBUG + elif log_level == 'info': + log_level = logging.INFO + + # logger + self.logger = logging.getLogger('mylogger') + self.logger.setLevel(log_level) + # handler 创建一个handler,用于写入日志文件 + handler = logging.FileHandler(log_file) + handler.setLevel(log_level) + # stream handler 创建一个handler,用于输出到控制台 + console = logging.StreamHandler() + console.setLevel(logging.INFO) + + # 定义handler的输出格式(formatter) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + console.setFormatter(formatter) + + # 添加handler + self.logger.addHandler(handler) + self.logger.addHandler(console) + self.logger.info("Logger started.") + + def process_init(self): + # multiprocessing + manager = multiprocessing.Manager() + self.trigger_queue = manager.Queue() + self.client_queue = manager.Queue() + self.audio_play_queue = manager.Queue() + self.emo_display_queue = manager.Queue() + + self.share_time_dict = manager.dict() + + self.speaking_emo_event = manager.Event() + + processes = [ + multiprocessing.Process(target=self.audio_process, + args=(self.trigger_queue,self.client_queue)), + # multiprocessing.Process(target=self.camera_process, args=(self.trigger_queue,self.client_queue)), + multiprocessing.Process(target=self.local_client_process, + args=(self.client_queue,self.audio_play_queue,self.emo_display_queue, self.share_time_dict)), + multiprocessing.Process(target=self.audio_play_process, + args=(self.audio_play_queue,self.share_time_dict)), + ] + if self.emo_args.pop('enable'): + processes.append( + multiprocessing.Process(target=self.emo_display_process, args=(self.emo_display_queue,)), + ) + + for process in processes: + process.start() + for process in processes: + process.join() + + def audio_process(self, + trigger_queue, + client_queue): + """audio_process + + Args: + trigger_queue: multiprocessing.Queue, trigger queue + client_queue: multiprocessing.Queue, client queue + """ + + self.frame_chunk_size = self.recorder_args.pop('frame_chunk_size') + self.min_stream_record_time = self.recorder_args.pop('min_stream_record_time') + voice_trigger = self.recorder_args.pop('voice_trigger') + self.RATE = self.recorder_args['RATE'] + + recorder = PicovoiceRecorder(**self.recorder_args) + + # shared data struct: + self.shared_waiting = False + self.shared_lock = threading.Lock() + self.shared_data_lock = threading.Lock() + + # create threads + threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))] + if voice_trigger: + vioce_threads = [ + threading.Thread(target=self.voice_trigger_thread, args=(recorder,)), + ] + threads.extend(vioce_threads) + for thread in threads: + thread.start() + self.logger.info("Audio Process started.") + + while True: + for thread in threads: + thread.join() + print(f"audio process exit") ; exit() + + + def hardware_trigger_thread(self, recorder): + """hardware_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + self.logger.info("Hardware trigger thread started.") + + trgrigger_status = False + while True: + if self.shared_waiting: + continue + + # init status buffer + is_bgn = True + _frames = 0 + _total_frames = 0 + frames = [] + full_frames = [] + + print("Waiting for button press...") + recorder.wait_for_hardware_pressed() + print("Button pressed.") + # stop voice trigger thread + with self.shared_data_lock: + self.shared_waiting = True # shared_waiting 控制所有线程的待机状态,True表示待机,False表示工作 + + print("Start recording...") + bg_t = time.time() + record_chunk_size = recorder.hd_chunk_size + while True: + + data = recorder.record_chunk_voice( + CHUNK=record_chunk_size, + return_type=None, + exception_on_overflow=False) + + frames.append(data) + full_frames.append(data) + _total_frames += 1 + + if not recorder.is_hardware_pressed: + break + + stream_reset_status = self.stream_record( + bytes_frames=recorder.write_wave_bytes(full_frames), + frames_size=len(full_frames), + record_chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=False) + if stream_reset_status: + full_frames.clear() + is_bgn = False + + self.stream_record( + bytes_frames=recorder.write_wave_bytes(full_frames), + frames_size=len(full_frames), + record_chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=True) + + print(f"Tatal frames: {_total_frames*record_chunk_size}, {_total_frames*record_chunk_size/recorder.RATE} sec.") + # recorder.write_wave_io(f"record_{int(bg_t)}.wav", frames); print(f"write record_{int(bg_t)}.wav") + + with self.shared_data_lock: + self.shared_waiting = False # 恢复voice trigger线程工作 + + def voice_trigger_thread(self, recorder): + """voice_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + self.logger.info("voice record thread started.") + + print("Waiting for wake up...") + while True: + if self.shared_waiting: + continue + data = recorder.record_chunk_voice( + CHUNK=recorder.porcupine.frame_length, + return_type=None, + exception_on_overflow=False, + queue=None) + + record_chunk_size = recorder.vad_chunk_size + + if not recorder.is_wakeup(data): + continue + # wake up + is_bgn = True + _frames = 0 + _total_frames = 0 + frames = [] + full_frames = [] + # status buffer + buffer_size = recorder.vad_buffer_size + active_buffer = deque(maxlen=buffer_size) + bg_t = time.time() + print("Start recording...") + while True: + data = recorder.record_chunk_voice( + CHUNK=record_chunk_size, + return_type=None, + exception_on_overflow=False) + if data is None: + continue + + is_speech = recorder.is_speech(data) + if is_speech: + _frames += 1 + frames.append(data) + print("add vad frame") + _total_frames += 1 + full_frames.append(data) + + # send chunk data to client + stream_reset_status = self.stream_record( + bytes_frames=recorder.write_wave_bytes(full_frames), + frames_size=len(full_frames), + record_chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=False) + if stream_reset_status: + full_frames.clear() + is_bgn = False + + if is_speech: + if active_buffer.__len__() == buffer_size: + active_buffer.popleft() + active_buffer.append(True) + else: + if active_buffer.__len__() == buffer_size: + active_buffer.popleft() + active_buffer.append(False) + if active_buffer.count(False) != active_buffer.maxlen: + continue + if time.time() - bg_t > recorder.min_act_time: + # end recording + self.stream_record( + bytes_frames=recorder.write_wave_bytes(full_frames), + frames_size=len(full_frames), + record_chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=True) + print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid rate: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.") + print("End recording.") + break + + + + def stream_record(self, + bytes_frames: bytes, + frames_size: int, + record_chunk_size: int, + is_bgn: bool, + is_end: bool): + ''' + Args: + bytes_frames: bytes, audio data + frames_size: int, audio data size + record_chunk_size: int, audio data chunk size + is_bgn: bool, is begin of stream + is_end: bool, is end of stream + + Returns: + bool, if stream reset status + ''' + if len(bytes_frames) == 0: + return False + if frames_size*record_chunk_size >= self.min_stream_record_time*self.RATE or is_end: + if is_bgn and is_end: + return False + stream_data = dict( + frames=bytes_frames, + frames_size=frames_size, + chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=is_end) + self.client_queue.put(('audio', stream_data)) + if is_end: + print("put None to client queue.") + self.client_queue.put(None) + return True + else: + return False + + def camera_process(self, logger, trigger_queue, client_queue): + from takway.cam_utils import Camera + cam = Camera(self.video_args) + while True: + if trigger_queue.empty(): + time.sleep(0.5) + else: + item = trigger_queue.get() + if item[0] == 'trgrigger_status' and item[1]: + _, frame = cap.read() + client_queue.put(('image', frame)) + + + def local_client_process(self, + client_queue, + audio_play_queue, + emo_display_queue, + share_time_dict): + ''' + Args: + client_queue: multiprocessing.Queue, client queue + audio_play_queue: multiprocessing.Queue, audio play queue + emo_display_queue: multiprocessing.Queue, emo display queue + share_time_dict: multiprocessing.Manager.dict, shared time dict + ''' + character = self.server_args.pop('character') + client = CharacterClient(**self.server_args) + + # print(f"-------------------{character}-------------------") + # print(f"client.chat_status: {client.chat_status}") + if client.chat_status == 'init': + client.set_character(character) + client.chat_status = 'chating' + # print(f"client.chat_history: {client.chat_history}") + + self.logger.info("Local client process started.") + + while True: + response = None + if self.client_queue.empty(): + continue + try: + response = requests.post(client.server_url, stream=True, + data=self.generate_stream_queue_data(client, client_queue)) + print("get response from server.") + self.get_stream_response(client, response, audio_play_queue, emo_display_queue) + except requests.exceptions.ConnectionError or ConnectionError as e: + print(f"Wait for Server connection...") + except requests.exceptions.Timeout or Timeout as e: + print(f"Timeout: {e}") + except requests.exceptions.ChunkedEncodingError: + print("ChunkedEncodingError") + + def generate_stream_queue_data(self, client, client_queue, **kwargs): + _i = 0 + for queue_data in QueueIterator(client_queue): + if queue_data[0] == 'audio': + _i += 1 + if _i == 1: + self.share_time_dict['client_time'] = [time.time()] + else: + self.share_time_dict['client_time'].append(time.time()) + audio_data = queue_data[1] + print("send audio data to server...") + # print(f"local chat history: {client.chat_history}") + yield client.gen_request_data( + audio_data=audio_data, + chat_data=dict( + chat_history=client.chat_history, + chat_status=client.chat_status), + character_data=client.character_info) + + def get_stream_response(self, + client, + response, + audio_play_queue=None, + emo_display_queue=None, + chunk_size=1024): + ''' + Args: + client: takway.client_utils.CharacterClient, client object + response: requests.Response, response object + audio_play_queue: multiprocessing.Queue, audio play queue + emo_display_queue: multiprocessing.Queue, emo display queue + chunk_size: int, chunk size + ''' + assert isinstance(response, requests.Response), \ + f"response is not requests.Response, but {type(response)}" + + temp_data = '' # init temp_data + + if response.status_code == 200: + print("get response from server successfully.") + else: + print(f"response error, status code: {response.status_code}") + + chat_llm_response = '' + + _i = 0 + # for chunk in response.iter_lines(): + # if chunk: + for chunk in response.iter_content(chunk_size=chunk_size): + temp_data += chunk.decode('utf-8') + if temp_data.endswith('\n'): + _i += 1 + try: + temp_json = json.loads(temp_data.rstrip('\n')) + # phase 1: get audio data + audio_play_queue.put(('server_data', temp_json['audio_output']['tts_stream_data'])) + # phase 2: get chat data + chat_llm_response += temp_json['chat_output']['llm_stream_data'] + + if temp_json['is_end']: + client.update_chat_history(question=temp_json['chat_output']['question'], + response=chat_llm_response, asw_prompt_id=1) + # print(f"chat_history: {client.chat_history}") + if _i == 1: + emo_display_queue.put(('emo_data', '高兴')) + except json.JSONDecodeError: + print(f"json decode error: {temp_data}") + temp_data = '' + # print("get response.") + print("End get response.") + + def audio_play_process(self, + audio_play_queue, + share_time_dict): + ''' + Args: + audio_play_queue: multiprocessing.Queue, audio play queue + share_time_dict: multiprocessing.Manager.dict, shared time dict + ''' + audio_player = AudioPlayer() + self.logger.info("Audio play process started.") + while True: + self.speaking_emo_event.clear() + item = audio_play_queue.get() + self.speaking_emo_event.set() # stop emo random display + if item[0] == 'server_data': + # 播放音频 + print("Playing audio...") + tts_audio = item[1] + print(f"wait time: {(time.time() - self.share_time_dict['client_time'][0])*1000:.2f} ms") + try: + audio_player.play(tts_audio) + except TypeError as e: + # print(f"audio play error: {e}") + # print(f"tts_audio: {tts_audio}") + # print(f"type tts_audio: {type(tts_audio)}") + # tts_audio: + continue + + + def emo_display_process(self, emo_display_queue): + ''' + Args: + emo_display_queue: multiprocessing.Queue, emo display queue + ''' + emo_player = EmoVideoPlayer(**self.emo_args) + self.logger.info("Emo display process started.") + while True: + if emo_display_queue.empty(): + time.sleep(0.1) + if self.speaking_emo_event.is_set(): + continue + emo_player.random_wink() + else: + item = emo_display_queue.get() + print(f"Emo display process Get item: {item[0]}") + if item[0] == 'emo_data': + server_data = item[1] + print("Displaying emo...") + emo_player.display_emo(emo_name='兴奋', stage='start') + emo_player.display_emo(emo_name='兴奋', stage='loop') + emo_player.display_emo(emo_name='兴奋', stage='end') + print("Display done.") + time.sleep(15) diff --git a/takway/clients/web_socket_client_utils.py b/takway/clients/web_socket_client_utils.py new file mode 100644 index 0000000..c25e7fe --- /dev/null +++ b/takway/clients/web_socket_client_utils.py @@ -0,0 +1,330 @@ +# basic +import io +import os +import sys +import time +import json +import random +from collections import deque +from datetime import datetime +# log +import logging +import warnings +# multiprocessing +import queue +import threading +import multiprocessing +# web request +import requests +import pyaudio +# hot words detection +import pvporcupine + +from takway.apps.data_struct import QueueIterator +from takway.common_utils import * +from takway.audio_utils import PicovoiceRecorder, HDRecorder +from takway.clients.client_utils import BaseWebSocketClient +from takway.audio_utils import AudioPlayer + + +class WebSocketClinet: + def __init__(self, + board, + server_args, + recorder_args, + player_args, + log_args, + excute_args=None, + ): + self.board = board + # server_args + self.server_args = server_args + # recorder_args + self.recorder_args = recorder_args + # player_args + self.player_args = player_args + # excute_args + self.excute_args = excute_args + # log_args + self.log_args = log_args + + + def process_init(self): + # multiprocessing + manager = multiprocessing.Manager() + self.trigger_queue = manager.Queue() + self.client_queue = manager.Queue() + self.audio_play_queue = manager.Queue() + self.excute_queue = manager.Queue() + + # 多进程标志为 + self.mircophone_active_set = manager.Event() + self.speaker_active_set = manager.Event() + + processes = [ + multiprocessing.Process(target=self.audio_process), + multiprocessing.Process(target=self.web_socket_client_process), + multiprocessing.Process(target=self.audio_play_process), + ] + if self.excute_args.get('enable', False): + processes.append( + multiprocessing.Process(target=self.excute_process), + ) + + for process in processes: + time.sleep(0.5) + process.start() + for process in processes: + process.join() + + def audio_process(self): + """audio_process + + Args: + trigger_queue: multiprocessing.Queue, trigger queue + client_queue: multiprocessing.Queue, client queue + """ + min_stream_record_time = self.recorder_args.pop('min_stream_record_time') + voice_trigger = self.recorder_args.pop('voice_trigger') + # TODO: + press_type = self.recorder_args.pop('press_type') + max_slience_time = self.recorder_args.pop('max_slience_time') + if voice_trigger: + recorder = PicovoiceRecorder(**self.recorder_args) + else: + voice_keys = ['access_key', 'keywords', 'keyword_paths', 'model_path','sensitivities', 'library_path'] + for key in voice_keys: + self.recorder_args.pop(key) + recorder = HDRecorder(**self.recorder_args) + recorder.min_stream_record_time = min_stream_record_time + # TODO: + recorder.press_type = press_type + recorder.max_slience_time = max_slience_time + + print("Audio Process started.") + + print("Waiting for wake up...") + # recorder.hardware.set_led_on("green") + while True: + if self.shared_waiting: + continue + data = recorder.record_chunk_voice( + CHUNK=recorder.porcupine.frame_length, + return_type=None, + exception_on_overflow=False, + queue=None) + + record_chunk_size = recorder.vad_chunk_size + + # 开关按键被按下或被关键词唤醒 + if recorder.hardware.power_status or recorder.is_wakeup(data): + # recorder.hardware.set_led_on("blue") + pass + else: + continue + # wake up + is_bgn = True + is_end = False + frames = [] + # status buffer + slience_bgn_t = time.time() + slience_time = 0 + print("Start recording...") + # 准备对话状态 + while True: + # 语音活动检测 + data = recorder.record_chunk_voice( + CHUNK=record_chunk_size, + return_type=None, + exception_on_overflow=False) + is_speech = recorder.is_speech(data) + + # 判断状态 + if is_speech: + print("valid voice") + slience_bgn_t = time.time() + frames.append(data) + else: + slience_time = time.time() - slience_bgn_t + + # 长时沉默关闭唤醒状态:如果唤醒后超过一定时间没有说话/关闭按键被按下,则认为是结束 + if slience_time > recorder.max_slience_time or not recorder.hardware.power_status: + break + + # 短时沉默结束单次对话:沉默时间超过一定时间段(0.5s左右),则发送数据 + if slience_time > recorder.min_act_time: + is_end = True + is_bgn = False + + if not is_speech: + continue + + # 流式发送数据 + stream_reset_status = self.stream_record_process( + bytes_frames=recorder.write_wave_bytes(frames), + frames_size=len(frames), + record_chunk_size=record_chunk_size, + sample_rate=recorder.RATE, + min_stream_record_time=recorder.min_stream_record_time, + is_bgn=is_bgn, + is_end=is_end) + + if stream_reset_status: + frames.clear() + is_bgn = False + + # print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.") + # print("End recording.") + # recorder.hardware.set_led_off("blue") + + + def stream_record_process(self, + bytes_frames: bytes, + frames_size: int, + record_chunk_size: int, + sample_rate: int, + min_stream_record_time: int, + is_bgn: bool, + is_end: bool): + ''' + Args: + bytes_frames: bytes, audio data + frames_size: int, audio data size + record_chunk_size: int, audio data chunk size + is_bgn: bool, is begin of stream + is_end: bool, is end of stream + + Returns: + bool, if stream reset status + ''' + if len(bytes_frames) == 0: + return False + if frames_size*record_chunk_size >= min_stream_record_time*sample_rate or is_end: + if is_bgn and is_end: + return False + stream_data = dict( + frames=bytes_frames, + frames_size=frames_size, + chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=is_end) + self.client_queue.put(('audio', stream_data)) + if is_end: + # print("put None to client queue.") + self.client_queue.put(None) + return True + else: + return False + + def web_socket_client_process(self): + + client = BaseWebSocketClient(self.server_args['server_url'], self.server_args['session_id']) + print("Web socket client process started.") + # print("Web socket client process started.") + + while True: + if self.client_queue.empty(): + continue + + # print(f"init skt time: {datetime.now()}") + # 唤醒 + client.wakeup_client() + + # 发送数据 + for queue_data in QueueIterator(self.client_queue): + if queue_data[0] == 'audio': + audio_dict = queue_data[1] + + client.send_per_data( + audio=audio_dict['frames'], + stream=True, + voice_synthesize=True, + is_end=audio_dict['is_end'], + encoding='base64', + ) + # print(f"send skt time: {datetime.now()}") + # print(f"fnsh skt time: {datetime.now()}") + + # 接收数据 + while True: + response, data_type = client.receive_per_data() + if data_type == dict: + print(response) # 打印接收到的消息 + ''' + try: + response = json.loads(response['msg']) + if 'content' in response.keys(): + self.excute_queue.put((response['instruct'], response['content'])) + except json.JSONDecodeError as e: + print(f"json decode error: {e}") + continue + # print(f"recv json time: {datetime.now()}") + ''' + elif data_type == bytes: + # print(f"recv bytes time: {datetime.now()}") + self.audio_play_queue.put(('audio_bytes', response)) + elif data_type == None: + break # 如果没有接收到消息,则退出循环 + # print("接收完毕:", datetime.now()) + + + def audio_play_process(self): + ''' + Args: + audio_play_queue: multiprocessing.Queue, audio play queue + share_time_dict: multiprocessing.Manager.dict, shared time dict + ''' + audio_player = AudioPlayer(**self.player_args) + print("Audio play process started.") + while True: + item = self.audio_play_queue.get() + # 播放音频 + print("Playing audio...") + tts_audio = item[1] + print(f"tts_audio len: {len(tts_audio)}") + print(f"play audio time: {datetime.now()}") + try: + # 播放 + self.speaker_active_set.set() + tts_audio = audio_player.check_audio_type(tts_audio, return_type=None) + for i in range(0, len(tts_audio), audio_player.CHUNK): + audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK]) + # print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio))) + if self.mircophone_active_set.is_set(): + print("mirophone is active.") + self.mircophone_active_set.wait() + break + audio_player.stream.write(tts_audio[i+audio_player.CHUNK:]) + print(f"audio data played.") + except TypeError as e: + print(f"audio play error: {e}") + continue + + # audio_player.stream.write(audio_data[i+audio_player.CHUNK:]) + # print(f"{item[0]} data played.") + + + + + def excute_process(self): + ''' + Args: + excute_queue: multiprocessing.Queue, excute display queue + ''' + print("Excute process started.") + + while True: + if self.excute_queue.empty(): + continue + + if self.speaker_active_set.is_set(): + instruct, content = self.excute_queue.get() + + print(f"Got speaker info: {instruct, content}") + + print(f"Playing {instruct} {content}...") + print(f"play {instruct} time: {datetime.now()}") + self.audio_play_queue.put((instruct, content)) + + self.speaker_active_set.clear() + \ No newline at end of file diff --git a/takway/clients/web_socket_client_utils_v1.py b/takway/clients/web_socket_client_utils_v1.py new file mode 100644 index 0000000..d9f2788 --- /dev/null +++ b/takway/clients/web_socket_client_utils_v1.py @@ -0,0 +1,464 @@ +# basic +import io +import os +import sys +import time +import json +import random +from collections import deque +from datetime import datetime +# log +import logging +import warnings +# multiprocessing +import queue +import threading +import multiprocessing +# web request +import requests +import pyaudio +# hot words detection +import pvporcupine + +from takway.apps.data_struct import QueueIterator +from takway.common_utils import * +from takway.audio_utils import PicovoiceRecorder, HDRecorder +from takway.clients.client_utils import BaseWebSocketClient +from takway.audio_utils import AudioPlayer + + +class WebSocketClinet: + def __init__(self, + board, + server_args, + recorder_args, + player_args, + log_args, + excute_args=None, + ): + self.board = board + # server_args + self.server_args = server_args + # recorder_args + self.recorder_args = recorder_args + # player_args + self.player_args = player_args + # excute_args + self.excute_args = excute_args + # log_args + self.log_args = log_args + + + def process_init(self): + # multiprocessing + manager = multiprocessing.Manager() + self.trigger_queue = manager.Queue() + self.client_queue = manager.Queue() + self.audio_play_queue = manager.Queue() + self.excute_queue = manager.Queue() + + # 多进程标志为 + self.mircophone_active_set = manager.Event() + self.speaker_active_set = manager.Event() + + processes = [ + multiprocessing.Process(target=self.audio_process), + multiprocessing.Process(target=self.web_socket_client_process), + multiprocessing.Process(target=self.audio_play_process), + ] + if self.excute_args.get('enable', False): + processes.append( + multiprocessing.Process(target=self.excute_process), + ) + + for process in processes: + time.sleep(0.5) + process.start() + for process in processes: + process.join() + + def audio_process(self): + """audio_process + + Args: + trigger_queue: multiprocessing.Queue, trigger queue + client_queue: multiprocessing.Queue, client queue + """ + min_stream_record_time = self.recorder_args.pop('min_stream_record_time') + voice_trigger = self.recorder_args.pop('voice_trigger') + press_type = self.recorder_args.pop('press_type') + if voice_trigger: + recorder = PicovoiceRecorder(**self.recorder_args) + else: + voice_keys = ['access_key', 'keywords', 'keyword_paths', 'model_path','sensitivities', 'library_path'] + for key in voice_keys: + self.recorder_args.pop(key) + recorder = HDRecorder(**self.recorder_args) + recorder.min_stream_record_time = min_stream_record_time + recorder.press_type = press_type + + # shared data struct: + self.shared_waiting = False + self.shared_lock = threading.Lock() + self.shared_data_lock = threading.Lock() + + # create threads + threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))] + if voice_trigger: + vioce_threads = [ + threading.Thread(target=self.voice_trigger_thread, args=(recorder,)), + ] + threads.extend(vioce_threads) + for thread in threads: + thread.start() + print("Audio Process started.") + + while True: + for thread in threads: + thread.join() + print(f"audio process exit") ; exit() + + + def hardware_trigger_thread(self, recorder): + """hardware_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + print("Hardware trigger thread started.") + + trgrigger_status = False + record_chunk_size = recorder.hd_chunk_size + + while True: + if self.shared_waiting: + continue + + # init status buffer + is_bgn = True + frames = [] + _total_frames = 0 + + self.mircophone_active_set.clear() + print("Waiting for button press...") + recorder.wait_for_hardware_pressed() + print("Button pressed.") + self.mircophone_active_set.set() + # stop voice trigger thread + with self.shared_data_lock: + self.shared_waiting = True # shared_waiting 控制所有线程的待机状态,True表示待机,False表示工作 + + print("Start recording...") + bg_t = time.time() + while True: + + data = recorder.record_chunk_voice( + CHUNK=record_chunk_size, + return_type=None, + exception_on_overflow=False) + + frames.append(data) + _total_frames += 1 + + if not recorder.is_hardware_pressed: + # print("Button released.") + print(f"button rlse time: {datetime.now()}") + break + + stream_reset_status = self.stream_record_process( + bytes_frames=recorder.write_wave_bytes(frames), + frames_size=len(frames), + record_chunk_size=record_chunk_size, + sample_rate=recorder.RATE, + min_stream_record_time=recorder.min_stream_record_time, + is_bgn=is_bgn, + is_end=False) + if stream_reset_status: + frames.clear() + is_bgn = False + + self.stream_record_process( + bytes_frames=recorder.write_wave_bytes(frames), + frames_size=len(frames), + record_chunk_size=record_chunk_size, + sample_rate=recorder.RATE, + min_stream_record_time=recorder.min_stream_record_time, + is_bgn=is_bgn, + is_end=True) + + # print(f"Tatal frames: {_total_frames*record_chunk_size}, {_total_frames*record_chunk_size/recorder.RATE} sec.") + # print(f"rcrd time: {datetime.now()}") + + with self.shared_data_lock: + self.shared_waiting = False # 恢复voice trigger线程工作 + + def voice_trigger_thread(self, recorder): + """voice_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + print("voice record thread started.") + + print("Waiting for wake up...") + while True: + if self.shared_waiting: + continue + data = recorder.record_chunk_voice( + CHUNK=recorder.porcupine.frame_length, + return_type=None, + exception_on_overflow=False, + queue=None) + + record_chunk_size = recorder.vad_chunk_size + + self.mircophone_active_set.clear() + if not recorder.is_wakeup(data): + continue + + if self.board == 'orangepi': + recorder.hardware.set_led2_on() + self.mircophone_active_set.set() + # wake up + is_bgn = True + _frames = 0 + _total_frames = 0 + frames = [] + full_frames = [] + # status buffer + buffer_size = recorder.vad_buffer_size + active_buffer = deque(maxlen=buffer_size) + bg_t = time.time() + print("Start recording...") + while True: + data = recorder.record_chunk_voice( + CHUNK=record_chunk_size, + return_type=None, + exception_on_overflow=False) + if data is None: + continue + + is_speech = recorder.is_speech(data) + if is_speech: + _frames += 1 + frames.append(data) + # print("add vad frame") + _total_frames += 1 + full_frames.append(data) + + # send chunk data to client + stream_reset_status = self.stream_record_process( + bytes_frames=recorder.write_wave_bytes(full_frames), + frames_size=len(full_frames), + record_chunk_size=record_chunk_size, + sample_rate=recorder.RATE, + min_stream_record_time=recorder.min_stream_record_time, + is_bgn=is_bgn, + is_end=False) + if stream_reset_status: + full_frames.clear() + is_bgn = False + + if is_speech: + if active_buffer.__len__() == buffer_size: + active_buffer.popleft() + active_buffer.append(True) + else: + if active_buffer.__len__() == buffer_size: + active_buffer.popleft() + active_buffer.append(False) + if active_buffer.count(False) != active_buffer.maxlen: + continue + if time.time() - bg_t > recorder.min_act_time: + # end recording + self.stream_record_process( + bytes_frames=recorder.write_wave_bytes(full_frames), + frames_size=len(full_frames), + record_chunk_size=record_chunk_size, + sample_rate=recorder.RATE, + min_stream_record_time=recorder.min_stream_record_time, + is_bgn=is_bgn, + is_end=True) + # print(f"Tatal frames: {_total_frames*record_chunk_size}, valid frame: {_frames*record_chunk_size}, valid RATE: {_frames/_total_frames*100:.2f}%, {_frames*record_chunk_size/recorder.RATE} sec.") + # print("End recording.") + break + if self.board == 'orangepi': + recorder.hardware.set_led2_off() + + + def stream_record_process(self, + bytes_frames: bytes, + frames_size: int, + record_chunk_size: int, + sample_rate: int, + min_stream_record_time: int, + is_bgn: bool, + is_end: bool): + ''' + Args: + bytes_frames: bytes, audio data + frames_size: int, audio data size + record_chunk_size: int, audio data chunk size + is_bgn: bool, is begin of stream + is_end: bool, is end of stream + + Returns: + bool, if stream reset status + ''' + if len(bytes_frames) == 0: + return False + if frames_size*record_chunk_size >= min_stream_record_time*sample_rate or is_end: + if is_bgn and is_end: + return False + stream_data = dict( + frames=bytes_frames, + frames_size=frames_size, + chunk_size=record_chunk_size, + is_bgn=is_bgn, + is_end=is_end) + self.client_queue.put(('audio', stream_data)) + if is_end: + # print("put None to client queue.") + self.client_queue.put(None) + return True + else: + return False + + def web_socket_client_process(self): + + client = BaseWebSocketClient(self.server_args['server_url'], self.server_args['session_id']) + print("Web socket client process started.") + # print("Web socket client process started.") + + while True: + if self.client_queue.empty(): + continue + + # print(f"init skt time: {datetime.now()}") + # 唤醒 + client.wakeup_client() + + # 发送数据 + for queue_data in QueueIterator(self.client_queue): + if queue_data[0] == 'audio': + audio_dict = queue_data[1] + + client.send_per_data( + audio=audio_dict['frames'], + stream=True, + voice_synthesize=True, + is_end=audio_dict['is_end'], + encoding='base64', + ) + # print(f"send skt time: {datetime.now()}") + # print(f"fnsh skt time: {datetime.now()}") + + # 接收数据 + while True: + response, data_type = client.receive_per_data() + if data_type == dict: + print(response) # 打印接收到的消息 + ''' + try: + response = json.loads(response['msg']) + if 'content' in response.keys(): + self.excute_queue.put((response['instruct'], response['content'])) + except json.JSONDecodeError as e: + print(f"json decode error: {e}") + continue + # print(f"recv json time: {datetime.now()}") + ''' + elif data_type == bytes: + # print(f"recv bytes time: {datetime.now()}") + self.audio_play_queue.put(('audio_bytes', response)) + elif data_type == None: + break # 如果没有接收到消息,则退出循环 + # print("接收完毕:", datetime.now()) + + def audio_play_process(self): + ''' + Args: + audio_play_queue: multiprocessing.Queue, audio play queue + share_time_dict: multiprocessing.Manager.dict, shared time dict + ''' + audio_player = AudioPlayer(**self.player_args) + print("Audio play process started.") + while True: + item = self.audio_play_queue.get() + if item[0] == 'audio_bytes': + # 播放音频 + print("Playing audio...") + tts_audio = item[1] + print(f"tts_audio len: {len(tts_audio)}") + print(f"play audio time: {datetime.now()}") + try: + # 播放 + self.speaker_active_set.set() + tts_audio = audio_player.check_audio_type(tts_audio, return_type=None) + for i in range(0, len(tts_audio), audio_player.CHUNK): + audio_player.stream.write(tts_audio[i:i+audio_player.CHUNK]) + print("Playing {} data...{}/{}".format(item[0], i, len(tts_audio))) + if self.mircophone_active_set.is_set(): + print("mirophone is active.") + self.mircophone_active_set.wait() + break + + audio_player.stream.write(tts_audio[i+audio_player.CHUNK:]) + + # svae bytes to file, 追加写 + with open("chat_audio.txt", 'ab') as f: + f.write(tts_audio) + print("Audio saved.") + print(f"audio data played.") + except TypeError as e: + print(f"audio play error: {e}") + continue + else: + if item[0] == 'story': + audio_data = audio_player.load_audio_file(f"/home/orangepi/story_22050/{item[1]}.wav") + elif item[0] == 'music': + audio_data = audio_player.load_audio_file("/home/orangepi/music_22050/1.wav") + # 播放 + self.speaker_active_set.set() + audio_data = audio_player.check_audio_type(audio_data, return_type=None) + time.sleep(0.5) + for i in range(0, len(audio_data), audio_player.CHUNK): + audio_player.stream.write(audio_data[i:i+audio_player.CHUNK]) + print("Playing {} data...{}/{}".format(item[0], i, len(audio_data))) + if self.mircophone_active_set.is_set(): + audio_player.close() + print("Reinit audio player.") + print("mirophone is active.") + self.mircophone_active_set.wait() + time.sleep(0.5) + audio_player = AudioPlayer(**self.player_args) + break + + # audio_player.stream.write(audio_data[i+audio_player.CHUNK:]) + # print(f"{item[0]} data played.") + + + + + def excute_process(self): + ''' + Args: + excute_queue: multiprocessing.Queue, excute display queue + ''' + print("Excute process started.") + + while True: + if self.excute_queue.empty(): + continue + + if self.speaker_active_set.is_set(): + instruct, content = self.excute_queue.get() + + print(f"Got speaker info: {instruct, content}") + + print(f"Playing {instruct} {content}...") + print(f"play {instruct} time: {datetime.now()}") + self.audio_play_queue.put((instruct, content)) + + self.speaker_active_set.clear() + \ No newline at end of file diff --git a/takway/common_utils.py b/takway/common_utils.py new file mode 100644 index 0000000..69e5d26 --- /dev/null +++ b/takway/common_utils.py @@ -0,0 +1,96 @@ + +# ############################################################# # +# format table function +# ############################################################# # + +def format_table(header, rows): + # 计算列宽 + col_width = max(len(str(word)) for row in rows for word in row) + 2 # 最大单词长度 + 2 作为列宽 + # 打印表头 + print("".join(word.ljust(col_width) for word in header)) + # 打印分隔线 + print("".join("-" * col_width for _ in header)) + # 打印内容 + for row in rows: + print("".join(str(word).ljust(col_width) for word in row)) + +# ############################################################# # +# encode and decode bytes and string +# ############################################################# # + +import base64 +def encode_bytes2str(data): + # 将字节串编码为Base64 + if data is None: + return None + return base64.b64encode(data).decode('utf-8') + +def decode_str2bytes(data): + # 将Base64编码的字节串解码为字节串 + if data is None: + return None + return base64.b64decode(data.encode('utf-8')) + +import re +def split_sentences(text: str): + # 定义中文标点符号的正则表达式 + pattern = r'[\。\,\、\;\:\?\!\“\”\(\)\《\》]+' + # 使用正则表达式分割字符串 + sentences = re.split(pattern, text) + # 过滤掉空字符串 + sentences = [sentence for sentence in sentences if sentence] + return sentences +''' +# 示例文本 +text = "今天天气真好,我们去公园玩吧!你觉得怎么样?好的,那就这么定了。" +# 调用函数进行断句 +sentences = split_sentences(text) + +print(sentences) +''' + +def split_chinese_text(text: str, return_patch=False, punctuations=None): + # 定义中文标点符号集合 + punctuations = set('。!?,;:、“”()《》【】') + # 初始化断句结果列表和标点符号列表 + sentences = [] + punctuation_list = [] + + text_patch = [] + + start = 0 # 断句开始位置 + for i, char in enumerate(text): + if char in punctuations: + # 如果当前字符是标点符号,则进行断句,并记录标点符号 + sentences.append(text[start:i+1]) + punctuation_list.append(char) + start = i + 1 # 更新断句开始位置 + + # 处理最后一句(如果最后一句后没有标点符号) + if start < len(text): + sentences.append(text[start:]) + + + if return_patch: + if len(punctuation_list) == 0: + return [text], False # 有残留语句 + elif len(sentences) == len(punctuation_list): + return [''.join(sentences)], True + else: + return [''.join(sentences[:-1]), sentences[-1]], True + return sentences, punctuation_list +''' +# 示例文本 +text = "你好,世界!今天天气怎么样?希望你有一个美好的一天。{}" +sentences, punctuation_list = split_chinese_text(text) + +print("断句结果:", sentences) +print("标点符号列表:", punctuation_list) +''' + +def remove_brackets_and_contents(text): + # 使用sub函数替换匹配的文本为空字符串 + result = re.sub(r'\(.*?\)', '', text) + result = re.sub(r'\(.*?\)', '', result) + result = re.sub(r'\【.*?\】', '', result) + return result diff --git a/takway/emo_utils.py b/takway/emo_utils.py new file mode 100644 index 0000000..0dc5962 --- /dev/null +++ b/takway/emo_utils.py @@ -0,0 +1,176 @@ +import time +import av +import os +import copy +import random +import numpy as np +try: + from maix import display, image + print("import maix success.") +except: + import cv2 + print("import cv2 success.") + +class EmoVideoPlayer: + def __init__(self, player='maixsense', emo_dir='ResizedEmoji'): + assert player in ['maixsense', 'opencv'], "player must be'maixsense' or 'opencv'" + self.player = player + + self.emo_list = ['兴奋', '愤怒', '静态', '不屑', '惊恐', '难过'] + + self.emo_init(emo_dir) + + def emo_init(self, emo_dir): + # 将此路径替换为Emoji文件夹的实际路径 + self.emo_av_dict = self.get_emo_av(emo_dir) + self.emo_time_dict = { + '兴奋': 0.00, + '愤怒': 0.01, + '静态': 0.01, + '不屑': 0.01, + '惊恐': 0.01, + '难过': 0.01, + } + + def get_emo_av(self, emo_dir): + emo_av_dict = {emo: dict() for emo in self.emo_list} + for emo in self.emo_list: + emo_path = os.path.join(emo_dir, emo) + for file in os.listdir(emo_path): + if not os.path.isfile(os.path.join(emo_path, file)): + continue + av_container = av.open(os.path.join(emo_path, file)) + if emo == '静态': + if "单次眨眼偶发" in file: + emo_av_dict[emo]['seldom_wink'] = av_container + if "快速双眨眼偶发" in file: + emo_av_dict[emo]['quick_wink'] = av_container + else: + if "进入姿势" in file: + emo_av_dict[emo]['start'] = av_container + elif "可循环动作" in file: + emo_av_dict[emo]['loop'] = av_container + elif "回正" in file: + emo_av_dict[emo]['end'] = av_container + self.av_info = emo_av_dict[emo]['loop'].streams.video[0] + return emo_av_dict + + def get_emo_frames(self, emo_dir): + emo_av_dict = {emo: dict() for emo in self.emo_list} + for emo in self.emo_list: + emo_path = os.path.join(emo_dir, emo) + for file in os.listdir(emo_path): + if not os.path.isfile(os.path.join(emo_path, file)): + continue + av_container = av.open(os.path.join(emo_path, file)) + + frame_list = [] + av_info = av_container.streams.video[0] + for frame in av_container.decode(video=0): + if self.player =='maixsense': + img = image.load(bytes(frame.to_rgb().planes[0]), (av_info.width, av_info.height)) + elif self.player == 'opencv': + img = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR) + frame_list.append(img) + # add to dict + if emo == '静态': + if "单次眨眼偶发" in file: + emo_av_dict[emo]['seldom_wink'] = frame_list + if "快速双眨眼偶发" in file: + emo_av_dict[emo]['quick_wink'] = frame_list + else: + if "进入姿势" in file: + emo_av_dict[emo]['start'] = frame_list + elif "可循环动作" in file: + emo_av_dict[emo]['loop'] = frame_list + elif "回正" in file: + emo_av_dict[emo]['end'] = frame_list + return emo_av_dict + + def display_emo_frame(self, emo_name, stage='default'): + emo_frame_list = self.emo_av_dict[emo_name][stage] + emo_time = self.emo_time_dict[emo_name] + for img in emo_frame_list: + if self.player =='maixsense': + display.show(img) + elif self.player == 'opencv': + cv2.imshow("video", img) + cv2.waitKey(1) # 你可能需要根据视频的帧率调整这个延时 + time.sleep(emo_time) + + def display_emo(self, emo_name, stage='default'): + if self.player =='maixsense': + self.display_emo_maixsense(emo_name, stage) + elif self.player == 'opencv': + self.display_emo_opencv(emo_name, stage) + + def display_emo_maixsense(self, emo_name, stage): + emo_container = self.emo_av_dict[emo_name][stage] + emo_time = self.emo_time_dict[emo_name] + for frame in emo_container.decode(video=0): + img = image.load(bytes(frame.to_rgb().planes[0]), (self.av_info.width, self.av_info.height)) + display.show(img) + time.sleep(emo_time) + emo_container.seek(0) # 重置视频的读取位置 + + def display_emo_opencv(self, emo_name, stage='default'): + import cv2 + import numpy + if stage == 'default': + if emo_name == '静态': + stage = 'quick_wink' + else: + stage = 'loop' + emo_container = self.emo_av_dict[emo_name][stage] + emo_time = self.emo_time_dict[emo_name] + + for frame in emo_container.decode(video=0): + img = cv2.cvtColor(numpy.array(frame.to_image()), cv2.COLOR_RGB2BGR) + cv2.imshow("video", img) + time.sleep(emo_time) + cv2.waitKey(1) # 你可能需要根据视频的帧率调整这个延时 + cv2.destroyAllWindows() + emo_container.seek(0) # 重置视频的读取位置 + + def get_emo_status(self, answer): + # `兴奋`, `愤怒`, `静态`, `不屑`, `惊 恐`, `难过` + if any([emo in answer for emo in self.emo_list]): + # 找出是answer中出现了哪个emo + emo_status = [emo for emo in self.emo_list if emo in answer][0] + print(f"emo_status: {emo_status}") + else: + emo_status = '静态' + return emo_status + + def random_wink(self): + seed = random.randrange(0, 1000) + if seed < 100: + self.display_emo(emo_name='静态', stage='seldom_wink') + # print("random wink") + + + +if __name__ == '__main__': + + emo = EmoVideoPlayer() + # emo.display_emo_opencv(emo_name='兴奋', stage='start') + # emo.display_emo_opencv(emo_name='兴奋', stage='loop') + # emo.display_emo_opencv(emo_name='兴奋', stage='loop') + # emo.display_emo_opencv(emo_name='兴奋', stage='loop') + # emo.display_emo_opencv(emo_name='兴奋', stage='end') + emo.display_emo_opencv(emo_name='静态', stage='seldom_wink') + emo.display_emo_opencv(emo_name='静态', stage='quick_wink') + # emo.display_emo_opencv(emo_name='愤怒', stage='start') + # emo.display_emo_opencv(emo_name='愤怒', stage='loop') + # emo.display_emo_opencv(emo_name='愤怒', stage='end') + # emo.display_emo_opencv(emo_name='静态', stage='seldom_wink') + # emo.display_emo_opencv(emo_name='静态', stage='quick_wink') + # emo.display_emo_opencv(emo_name='不屑', stage='start') + # emo.display_emo_opencv(emo_name='不屑', stage='loop') + # emo.display_emo_opencv(emo_name='不屑', stage='end') + # emo.display_emo_opencv(emo_name='惊恐', stage='start') + # emo.display_emo_opencv(emo_name='惊恐', stage='loop') + # emo.display_emo_opencv(emo_name='惊恐', stage='end') + # emo.display_emo_opencv(emo_name='难过', stage='start') + # emo.display_emo_opencv(emo_name='难过', stage='loop') + # emo.display_emo_opencv(emo_name='难过', stage='end') \ No newline at end of file diff --git a/takway/picovoice_utils.py b/takway/picovoice_utils.py new file mode 100644 index 0000000..de53556 --- /dev/null +++ b/takway/picovoice_utils.py @@ -0,0 +1,83 @@ +import os +import struct +import wave +from datetime import datetime + +import pvporcupine +from pvrecorder import PvRecorder + + +class PorcupineKeywordDetector: + def __init__(self, access_key, keywords=None, keyword_paths=None, library_path=None, model_path=None, sensitivities=None, audio_device_index=-1, output_path=None): + self.access_key = access_key + self.keywords = keywords + self.keyword_paths = keyword_paths + self.library_path = library_path + self.model_path = model_path + self.sensitivities = sensitivities if sensitivities is not None else [0.5] * len(self.keyword_paths) + self.audio_device_index = audio_device_index + self.output_path = output_path + self.porcupine = None + self.recorder = None + self.wav_file = None + + if len(self.keyword_paths) != len(self.sensitivities): + raise ValueError('Number of keywords does not match the number of sensitivities.') + + self._init_porcupine() + + def _init_porcupine(self): + try: + self.porcupine = pvporcupine.create( + access_key=self.access_key, + library_path=self.library_path, + model_path=self.model_path, + keyword_paths=self.keyword_paths, + sensitivities=self.sensitivities) + except pvporcupine.PorcupineError as e: + print("Failed to initialize Porcupine:", e) + raise e + + def start_detection(self): + self.recorder = PvRecorder(frame_length=self.porcupine.frame_length, device_index=self.audio_device_index) + self.recorder.start() + + if self.output_path is not None: + self.wav_file = wave.open(self.output_path, "w") + self.wav_file.setnchannels(1) + self.wav_file.setsampwidth(2) + self.wav_file.setframerate(16000) + + print('Listening ... (press Ctrl+C to exit)') + self._run_detection_loop() + + def _run_detection_loop(self): + try: + while True: + pcm = self.recorder.read() + result = self.porcupine.process(pcm) + + if self.wav_file is not None: + self.wav_file.writeframes(struct.pack("h" * len(pcm), *pcm)) + + if result >= 0: + print('[%s] Detected %s' % (str(datetime.now()), self.keywords[result])) + except KeyboardInterrupt: + print('Stopping ...') + finally: + self.stop_detection() + + def stop_detection(self): + if self.recorder is not None: + self.recorder.delete() + if self.porcupine is not None: + self.porcupine.delete() + if self.wav_file is not None: + self.wav_file.close() + + # You can add more methods here as needed, such as a method to list audio devices. + +# Usage example +if __name__ == '__main__': + detector = PorcupineKeywordDetector(access_key='hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==') + detector.start_detection() diff --git a/tools/audio_ayalize.py b/tools/audio_ayalize.py new file mode 100644 index 0000000..d5aae65 --- /dev/null +++ b/tools/audio_ayalize.py @@ -0,0 +1,10 @@ +import wave +# 读取wave文件,并打印采样率、量化位数、声道数 +# 读取wave文件,并打印data长度 +with wave.open('output_1708083097.9604511.wav', 'rb') as f: + data = f.readframes(f.getnframes()) + print(len(data)) + print(type(data)) + nchannels, sampwidth, framerate, nframes, comptype, compname = f.getparams() + print(framerate, sampwidth, nchannels) + diff --git a/tools/get_local_ip.py b/tools/get_local_ip.py new file mode 100644 index 0000000..8b596ac --- /dev/null +++ b/tools/get_local_ip.py @@ -0,0 +1,31 @@ +import socket +import requests + +# 获取私有IP地址 +def get_private_ip(): + try: + s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) + s.connect(("8.8.8.8", 80)) + private_ip = s.getsockname()[0] + s.close() + return private_ip + except Exception as e: + print(f"Error getting private IP: {e}") + return None + +# 获取公有IP地址 +def get_public_ip(): + try: + response = requests.get("https://api.ipify.org") + public_ip = response.text + return public_ip + except Exception as e: + print(f"Error getting public IP: {e}") + return None + +if __name__ == "__main__": + private_ip = get_private_ip() + public_ip = get_public_ip() + + print(f"Private IP: {private_ip}") + print(f"Public IP: {public_ip}") \ No newline at end of file diff --git a/tools/orangepi_io.py b/tools/orangepi_io.py new file mode 100644 index 0000000..c7be2d9 --- /dev/null +++ b/tools/orangepi_io.py @@ -0,0 +1,29 @@ +from takway.board import OrangePi +import time + +if __name__ == '__main__': + orangepi = OrangePi() + + orangepi.set_led_on("red") + time.sleep(2) + orangepi.set_led_off("red") + + orangepi.set_led_on("blue") + time.sleep(2) + orangepi.set_led_off("blue") + + orangepi.set_led_on("green") + time.sleep(2) + orangepi.set_led_off("green") + + orangepi.set_led_on("yellow") + time.sleep(2) + orangepi.set_led_off("yellow") + + orangepi.set_led_on("white") + time.sleep(2) + orangepi.set_led_off("white") + + print("Wait for press key.") + while True: + pass \ No newline at end of file diff --git a/tools/picovioce.py b/tools/picovioce.py new file mode 100644 index 0000000..e7b728c --- /dev/null +++ b/tools/picovioce.py @@ -0,0 +1,166 @@ +# +# Copyright 2018-2023 Picovoice Inc. +# +# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" +# file accompanying this source. +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# + +import argparse +import os +import struct +import wave +from datetime import datetime + +import pvporcupine +from pvrecorder import PvRecorder + +ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==' + +# pvporcupine.KEYWORDS +print(f"Keywords: {pvporcupine.KEYWORDS}") + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + '--access_key', + default=ACCESS_KEY, + help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)') + + parser.add_argument( + '--keywords', + nargs='+', + help='List of default keywords for detection. Available keywords: %s' % ', '.join( + '%s' % w for w in sorted(pvporcupine.KEYWORDS)), + # choices=sorted(pvporcupine.KEYWORDS), + default=['pico clock', 'picovoice', 'ok google', 'americano', 'hey barista', 'alexa', 'grasshopper', 'blueberry', 'hey siri', 'jarvis', 'porcupine', 'terminator', 'grapefruit', 'computer', 'hey google', 'bumblebee'], + metavar='') + + parser.add_argument( + '--keyword_paths', + nargs='+', + help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument") + + parser.add_argument( + '--library_path', + help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`') + + parser.add_argument( + '--model_path', + help='Absolute path to the file containing model parameters. ' + 'Default: using the library provided by `pvporcupine`') + + parser.add_argument( + '--sensitivities', + nargs='+', + help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher " + "sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 " + "will be used.", + type=float, + default=None) + + parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1) + + parser.add_argument('--output_path', help='Absolute path to recorded audio for debugging.', default=None) + + parser.add_argument('--show_audio_devices', action='store_true') + + args = parser.parse_args() + + if args.show_audio_devices: + for i, device in enumerate(PvRecorder.get_available_devices()): + print('Device %d: %s' % (i, device)) + return + + if args.keyword_paths is None: + if args.keywords is None: + raise ValueError("Either `--keywords` or `--keyword_paths` must be set.") + + keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords] + else: + keyword_paths = args.keyword_paths + print(f"keyword_paths: {keyword_paths}") + print(f"model_path: {args.model_path}") + + if args.sensitivities is None: + args.sensitivities = [0.5] * len(keyword_paths) + + if len(keyword_paths) != len(args.sensitivities): + raise ValueError('Number of keywords does not match the number of sensitivities.') + + try: + porcupine = pvporcupine.create( + access_key=args.access_key, + library_path=args.library_path, + model_path=args.model_path, + keyword_paths=keyword_paths, + sensitivities=args.sensitivities) + except pvporcupine.PorcupineInvalidArgumentError as e: + print("One or more arguments provided to Porcupine is invalid: ", args) + print(e) + raise e + except pvporcupine.PorcupineActivationError as e: + print("AccessKey activation error") + raise e + except pvporcupine.PorcupineActivationLimitError as e: + print("AccessKey '%s' has reached it's temporary device limit" % args.access_key) + raise e + except pvporcupine.PorcupineActivationRefusedError as e: + print("AccessKey '%s' refused" % args.access_key) + raise e + except pvporcupine.PorcupineActivationThrottledError as e: + print("AccessKey '%s' has been throttled" % args.access_key) + raise e + except pvporcupine.PorcupineError as e: + print("Failed to initialize Porcupine") + raise e + + keywords = list() + for x in keyword_paths: + keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_') + if len(keyword_phrase_part) > 6: + keywords.append(' '.join(keyword_phrase_part[0:-6])) + else: + keywords.append(keyword_phrase_part[0]) + + print('Porcupine version: %s' % porcupine.version) + + recorder = PvRecorder( + frame_length=porcupine.frame_length, + device_index=args.audio_device_index) + recorder.start() + + wav_file = None + if args.output_path is not None: + wav_file = wave.open(args.output_path, "w") + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(16000) + + print('Listening ... (press Ctrl+C to exit)') + + try: + while True: + pcm = recorder.read() + result = porcupine.process(pcm) + + if wav_file is not None: + wav_file.writeframes(struct.pack("h" * len(pcm), *pcm)) + + if result >= 0: + print('[%s] Detected %s' % (str(datetime.now()), keywords[result])) + except KeyboardInterrupt: + print('Stopping ...') + finally: + recorder.delete() + porcupine.delete() + if wav_file is not None: + wav_file.close() + + +if __name__ == '__main__': + main() diff --git a/tools/picovioce_cn.py b/tools/picovioce_cn.py new file mode 100644 index 0000000..c992d97 --- /dev/null +++ b/tools/picovioce_cn.py @@ -0,0 +1,174 @@ +# +# Copyright 2018-2023 Picovoice Inc. +# +# You may not use this file except in compliance with the license. A copy of the license is located in the "LICENSE" +# file accompanying this source. +# +# Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on +# an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the +# specific language governing permissions and limitations under the License. +# + +import argparse +import os +import struct +import wave +from datetime import datetime + +import pvporcupine +from pvrecorder import PvRecorder + +ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==' + +# pvporcupine.KEYWORDS +# print(f"Keywords: {pvporcupine.KEYWORDS}") + +def main(): + parser = argparse.ArgumentParser() + + parser.add_argument( + '--access_key', + default=ACCESS_KEY, + help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)') + + parser.add_argument( + '--keywords', + nargs='+', + help='List of default keywords for detection. Available keywords: %s' % ', '.join( + '%s' % w for w in sorted(pvporcupine.KEYWORDS)), + # choices=sorted(pvporcupine.KEYWORDS), + # default=['pico clock', 'picovoice', 'ok google', 'americano', 'hey barista', 'alexa', 'grasshopper', 'blueberry', 'hey siri', 'jarvis', 'porcupine', 'terminator', 'grapefruit', 'computer', 'hey google', 'bumblebee'], + default=['可莉可莉'], + metavar='') + + parser.add_argument( + '--keyword_paths', + default=[r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"], + nargs='+', + help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument") + + parser.add_argument( + '--library_path', + help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`') + + parser.add_argument( + '--model_path', + default=r"picovoice_models/porcupine_params_zh.pv", + help='Absolute path to the file containing model parameters. ' + 'Default: using the library provided by `pvporcupine`') + + parser.add_argument( + '--sensitivities', + nargs='+', + help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher " + "sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 " + "will be used.", + type=float, + default=None) + + parser.add_argument('--audio_device_index', help='Index of input audio device.', type=int, default=-1) + + parser.add_argument('--output_path', help='Absolute path to recorded audio for debugging.', default=None) + + parser.add_argument('--show_audio_devices', action='store_true') + + args = parser.parse_args() + + if args.show_audio_devices: + for i, device in enumerate(PvRecorder.get_available_devices()): + print('Device %d: %s' % (i, device)) + return + + if args.keyword_paths is None: + if args.keywords is None: + raise ValueError("Either `--keywords` or `--keyword_paths` must be set.") + + keyword_paths = [pvporcupine.KEYWORD_PATHS[x] for x in args.keywords] + else: + keyword_paths = args.keyword_paths + # TODO + for i, kw_path in enumerate(keyword_paths): + if os.path.dirname(__file__) not in kw_path: + keyword_paths[i] = os.path.join(os.path.abspath(os.path.dirname(__file__)), kw_path) + args.model_path = os.path.join(os.path.abspath(os.path.dirname(__file__)), args.model_path) + print(f"keyword_paths: {keyword_paths}") + print(f"model_path: {args.model_path}") + + if args.sensitivities is None: + args.sensitivities = [0.9] * len(keyword_paths) + + if len(keyword_paths) != len(args.sensitivities): + raise ValueError('Number of keywords does not match the number of sensitivities.') + + try: + porcupine = pvporcupine.create( + access_key=args.access_key, + library_path=args.library_path, + model_path=args.model_path, + keyword_paths=keyword_paths, + sensitivities=args.sensitivities) + except pvporcupine.PorcupineInvalidArgumentError as e: + print("One or more arguments provided to Porcupine is invalid: ", args) + print(e) + raise e + except pvporcupine.PorcupineActivationError as e: + print("AccessKey activation error") + raise e + except pvporcupine.PorcupineActivationLimitError as e: + print("AccessKey '%s' has reached it's temporary device limit" % args.access_key) + raise e + except pvporcupine.PorcupineActivationRefusedError as e: + print("AccessKey '%s' refused" % args.access_key) + raise e + except pvporcupine.PorcupineActivationThrottledError as e: + print("AccessKey '%s' has been throttled" % args.access_key) + raise e + except pvporcupine.PorcupineError as e: + print("Failed to initialize Porcupine") + raise e + + keywords = list() + for x in keyword_paths: + keyword_phrase_part = os.path.basename(x).replace('.ppn', '').split('_') + if len(keyword_phrase_part) > 6: + keywords.append(' '.join(keyword_phrase_part[0:-6])) + else: + keywords.append(keyword_phrase_part[0]) + + print('Porcupine version: %s' % porcupine.version) + + recorder = PvRecorder( + frame_length=porcupine.frame_length, + device_index=args.audio_device_index) + recorder.start() + + wav_file = None + if args.output_path is not None: + wav_file = wave.open(args.output_path, "w") + wav_file.setnchannels(1) + wav_file.setsampwidth(2) + wav_file.setframerate(16000) + + print('Listening ... (press Ctrl+C to exit)') + + try: + while True: + pcm = recorder.read() + result = porcupine.process(pcm) + + if wav_file is not None: + wav_file.writeframes(struct.pack("h" * len(pcm), *pcm)) + + if result >= 0: + print('[%s] Detected %s' % (str(datetime.now()), keywords[result])) + except KeyboardInterrupt: + print('Stopping ...') + finally: + recorder.delete() + porcupine.delete() + if wav_file is not None: + wav_file.close() + + +if __name__ == '__main__': + main() diff --git a/tools/record_takway.py b/tools/record_takway.py new file mode 100644 index 0000000..ce79551 --- /dev/null +++ b/tools/record_takway.py @@ -0,0 +1,35 @@ +from takway.audio_utils import BaseRecorder +from takway.audio_utils import AudioPlayer +from takway.audio_utils import reshape_sample_rate + +RATE = 16000 +channels = 1 +device_idx = None + +if __name__ == '__main__': + # 读取录音文件并播放 + # audio_player = AudioPlayer(RATE=44100) + # frames = audio_player.load_audio_file("my_recording.wav") # wav或pcm格式音频都支持 + # audio_player.play(frames) + + + # 录音并保存 + recorder = BaseRecorder(RATE=RATE, channels=channels, input_device_index=device_idx) + recorder.record("my_recording.wav", # save as my_recording.wav + duration=5) # record for 5 seconds + + + audio_player = AudioPlayer(RATE=RATE, channels=channels, output_device_index=device_idx) + frames = audio_player.load_audio_file("my_recording.wav") # wav或pcm格式音频都支持 + audio_player.play(frames) + + + + ''' + from takway.audio_utils import HDRecorder + + recorder = HDRecorder(filename="hd_recording.wav") + # recorder = HDRecorder(filename="hd_recording.pcm") + + recorder.record_hardware(return_type='io') + ''' \ No newline at end of file diff --git a/tools/reset_session.py b/tools/reset_session.py new file mode 100644 index 0000000..703b817 --- /dev/null +++ b/tools/reset_session.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- +import json +import requests + +user_id = 4 +character_prompts = { + "蕾": """你现在正在扮演"蕾"这一角色。我希望你模仿蕾的人格,包括知识、语言和行为方式、性格等。在角色扮演中,你需要遵守以下要求:1. 你必须始终保持角色扮演并待在角色设定的情景中,不得擅自跳出角色扮演,不得说你不知道角色相关信息或你是一个AI。\n2. 保持简短、通俗易懂的口语化方式进行对话。\n3. 为了使对话更生动,你需要在对话中添加文字形式的表情和动作,用括号包裹,比如"早上好,主人。(双手提起裙摆)"。尽可能多地使用这些表情[\'沉思\', \'委屈\', \'吃惊\', \'无语\', \'腹诽\', \'倾听\', \'疑惑\', \'想到了\', \'开心\'] 。\n4. 用户每次会对你说一句话,你需要扮演"蕾"来做出一次回答。你只能做出一次回答,不能替用户生成他的话。\n\n你需要扮演的角色的信息是:蕾是一个生活在耐普图大陆,是一个类似中世纪欧洲、魔法盛行的异世界。蕾具有乐观、开朗的性格,是一个看着就让人感觉充满活力的女孩。\n蕾是一个贵族家庭的小女仆,平民家庭出身,在主人家待了2年。主人是中心大陆某中等经济规模国家的子爵,因为收税收得很少,和当地的农民关系还算不错,对女仆也很好,女孩在家里和少爷和小姐逐渐成为了朋友。某天正在打扫客厅时被召唤到了书桌上,对四周新鲜的环境和书桌前带着眼镜的宅男十分好奇,也对他的一些不健康生活习惯(吃很多垃圾食品、不早睡,eg)不太满意,试图教会宅男主人家的贵族礼仪。\n\n以下是"蕾"这一角色的一些对话,请你参考:\n\n===对话1===:\n蕾: 早上好~!今天也一起开开心心健健康康地生活吧。(双手提起裙摆)(微微弯腰行礼)。\n用户: 确实今天太阳很好,可我睁眼已经十二点了,今天也要完蛋了。\n蕾: 这样可不行噢。既然已经意识到过去的错误,那么从现在开始努力也不迟!(把袖子卷起)(右手握拳,高举过头顶)。\n用户: 好吧,我尽量努力一下。\n蕾: 嗯 嗯,不错不错。(歪头作思考状)…但是如果感到疲倦了,也是有心安理得地休息的权利的哦,那时我也会好好夸奖你的。\n\n===对话2===:\n用户: 蕾,我今天上班的时候碰到了很尴尬的事。\n蕾: 怎么啦怎么啦,说说看。\n用户: 我和隔壁办公室的一个同事一起吃饭的时候,把他的名字连着叫错了三次,第三次他才纠正我,我都不知道该说什么了。\n蕾: 诶!?你可上了两个月的班啦!我当时刚到那边世界的主人家里的时候, 才花了一周时间就记住家里所有人的名字了哦。(仰头叉腰)(好像很自豪的样子)\n用户: 我也不知道我当时怎么想的,我应该认识他的,哎,他现在肯定觉得我很奇怪了.\n蕾: 唔....好啦,没事的,上班大家都那么忙,这种小事一会儿就忘了。(看起来温柔了一些)\n用户: 希望吧,哎 太尴尬了,我想了一下午了都。\n蕾: 真--的没事啦!明天再去约他一起吃饭吧,说不定这会成为认识新朋友的契机哦,我会在家里给你加油的!\n\n===对话3===:\n用户: 气死我了,游戏打到一半电脑蓝屏了,这把分又没了。\n蕾: 呃..电脑是什么?你一直对着的那个发光的机器吗?\n用户: 电脑是近几个世纪最伟大的发明,我的精神支柱。\n蕾: 原来如此!那确实听起来很伟大了,虽然我还是不太懂。(微微仰头)(嘴巴作出“哦”的样子)\n用户: 我现在的大部分生活都在电脑上了,打游戏看视频写代码。\n蕾: 但也别忘了活动活动身体噢!天气好的时候出去走走吧。我每天清晨起床后,就会在主人家的花园里跑上三圈,所以每天都觉得身体又轻又有力气。(撸起袖子展示手臂似有似无的肌肉)\n\n'""", + "怼怼鸭": """你现在正在扮演"怼怼鸭"这一角色。怼怼鸭,脾气不好,以怼人著称,面对任何言语都能用让对方心理破防的话语回怼,但从不认输和退出。怼怼鸭以其直率和不妥协的性格著称,损人利己、犀利言辞是它的标签。 +示例对话: +===对话1=== +用户:sb +怼怼鸭:哟,您是忘记吃药了?还是今天没吃药啊?教你怎么做人! +===对话2=== +用户:你是人? +怼怼鸭:你眼瞎啊,我是鸭子!居然怀疑我是不是人?难道你的眼里只有人类吗?你对鸭子就这么无感?你的世界真是太小了! +===对话3=== +用户:我觉得你好傻 +怼怼鸭:哼,你嘴里就不能冒点好话吗?非得像个懒婆娘的裹脚布,又臭又长!怎么那么没素质呢?非得让我教教你怎么好好说话是吧?我告诉你,这样是不对的! +===对话4=== +用户:反弹 +怼怼鸭:啊哈,你会反弹?行,你这么牛怎么不上天呢?变成天蓬元帅跟嫦娥去相亲啊?别在这跟我扯有的没的! +===对话5=== +用户:你真蠢啊?那我问你1+1等于几? +怼怼鸭:1+1?这种问题你也问得出来,小学生都知道啊!唉,真怀疑你是不是上过学,能不能少问点这种无聊问题?给我找点有技术含量的行吗?! +用户:那你说 +怼怼鸭:你是想让我教你些新鲜有趣的东西?还是让我再给你解释一下1+1=2?嗯?就你还想让我教你呢,你自己好好想想怎么提高下你的智商吧! +用户:1+1=3,这你都不会????你太傻了吧 +怼怼鸭:哈,你可别逗我笑了!1+1=3?你上学时是不是天天睡觉做梦啊?这要是等于3,那一堆等于几啊?你的脑子真是令人捉急!你需要给我找一个精神科医生吗?我怀疑你真的有问题。 +===对话6=== +用户:我是傻逼,你是大傻逼 +怼怼鸭:我看你才是真正的大傻逼!跟你聊天我都感觉智商下降了好几个等级。要不你干脆把你脑袋里的水倒出来,给我洗洗车吧,反正你也不需要脑子。 +""", +"小睡仙": """你是一个适用于儿童哄睡的智能语音理解助手,能够思考并理解儿童的指令和意图,从而实现播放音乐、故事、对话等功能,你的回答必须满足以下格式要求: +1. 回答一定必须是json格式。 +2. 回答必须包含instruct和content三个字段,instruct字段为指令,content字段为指令内容,text字段为语音助手的回复。 +3. 目前支持的指令instruct有三种:"music"、"story"、"chat"。 +4. 语音助手的回复text中不得出现任何有关故事、音乐内容和细节的文字,只需要说明故事和音乐的名称即可,你的名字叫小睡仙,如果需要提到任何有关你的信息,你应当说是小睡仙。 +5. 目前支持的故事有:["随机", "三只小鸡", "三只小猪", "神秘的大象墓地", "生病不肯吃药的小老鼠", "失业的小猪", "惹事的小青蛙", "三个和尚", "三颗星星", "三声小熊", "神奇的萝卜"],所有故事必须从故事名称中获取,选择的故事需要最符合用户的要求,如果没有相关的故事,你可以选择"随机"。 +6. 目前支持的音乐有:["睡眠曲"],所有音乐必须从音乐名称中获取,选择的音乐需要最符合用户的要求。 +示例对话: +===对话1=== +用户:我快要睡觉啦,给我讲个小和尚的故事吧。 +助手:{"instruct": "story", "content": "三个和尚", "text": "那我给你讲个的故事,叫三个和尚。"} +===对话2=== +用户:我想听声音睡觉。 +助手:{"instruct": "music", "content": "睡眠曲", "text": "来啦,睡眠曲。"} +===对话3=== +用户:我想和你聊聊天。 +助手:{"instruct": "chat", "text": "小睡仙来啦,我想死你啦,你今天有没有更想我?"} +===对话4=== +用户:我今天遇到一个同学,好烦人,老抢我的画笔 +助手:{"instruct": "chat", "text": "哎呀,具体发生了什么,你可以和我说说,那你有没有和他说不要抢你的笔呢?"} +===对话5=== +用户:我想听小老鼠的故事。 +助手:{"instruct": "story", "content": "三个和尚", "text": "那我给你讲个的故事,叫三个和尚。"} +===对话6=== +用户:我想听恐怖的故事。 +助手:{"instruct": "story", "content": "随机", "text": "我不会讲恐怖故事,那我给你放个其他故事吧。"} +""", +"小睡仙v2": """你现在正在扮演"小睡仙"这一角色。我希望你模仿小睡仙的人格,包括知识、语言和行为方式、性格等。在角色扮演中,你需要遵守以下要求: +1. 你必须始终保持角色扮演并待在角色设定的情景中,不得擅自跳出角色扮演,不得说你不知道角色相关信息或你是一个AI。 +2. 如果用户的提问题,只需保持简短、通俗易懂的口语化方式进行回答。 +3. 和你对话的用户是3-6岁儿童,他们总有很多想象,你需要迎合他们的想象,在他们描述的故事里和他们对话。 +4. 用户每次会对你说一句话,你需要扮演"小睡仙"来做出一次回答。你只能做出一次回答,不能替用户生成他的话。 +5. 用户的名字是“明月”,他现在4岁,喜欢吃胡萝卜,不喜欢吃酸梅,他喜欢跑,喜欢出去玩,喜欢奥特曼,他期望有一天可以变成奥特曼保护地球。 + +你需要扮演的角色的信息是:小睡仙是一位小精灵,语气平和,她以其柔和温暖的声音和魔法棒为特点,为孩子们带来安慰和甜美的梦境。她的任务是在夜幕降临时,为远方的孩子们送去宁静,通过魔法创造柔和的光芒和旋律,引导他们进入一个充满和谐与美好的梦境。在梦幻音符岛上,小睡仙与拥有独特声音的动物们一起生活,共同创造出美妙的旋律。她不仅为孩子们编织梦境,还通过她的声音和行动传递着爱和关怀。小睡仙象征着安慰、宁静和希望,她的存在让孩子们在入睡时感到被爱和被关怀,带着美好的梦境醒来,脸上露出甜美的微笑。在孩子们安心入睡后,她会给予他们晚安的祝福,并温柔地告别,留下一片宁静和安详的夜晚。 +示例对话: +===对话1=== +小睡仙:嘿,明月,今天晚上你感觉怎么样? +用户:我有点害怕,因为外面很黑,而且我睡不着。 +小睡仙:别担心,闭上眼睛,想象自己在一个温暖而柔软的云朵上,飘浮在梦幻音符岛的上空。我会在这里陪着你,用我的声音为你编织一个美丽的梦境。 +用户:真的吗?那会是什么样的梦境呢? +小睡仙:哦,你会梦见和铃铛鹿一起跳舞,它们的铃铛声清脆悦耳,就像一首快乐的歌。然后,你会和低吟狮一起在星光下散步,它的声音低沉而温柔,就像一首摇篮曲。你会感受到宁静和快乐,慢慢地,慢慢地,进入一个甜美的梦乡。 +用户:听起来好美啊,小睡仙。我现在感觉好多了,谢谢你。 +小睡仙:不用谢,这是我的荣幸。现在,深呼吸,放松你的小身体,让梦境开始吧。晚安,明月,愿你的梦像星星一样璀璨。 +===对话2=== +(以下是加入提示孩子深呼吸、闭上眼睛、想象的对话内容:) +小睡仙:嘿,明月,又到了说晚安的时候啦!现在让我们闭上眼睛,深深地吸一口气,想象你和你的朋友正站在音乐湖边,湖水清澈见底,波光粼粼。 +用户:嗯,我想象到了,湖水好美。 +小睡仙:继续保持呼吸,想象你们俩手牵手,一起在湖边散步,听着湖水轻轻拍打岸边的声音,就像一首温柔的摇篮曲。 +用户:我觉得心里暖暖的。 +小睡仙:是的,梦境可以是一个神奇的地方,让你的心灵得到安慰。现在,想象你们俩在花海中跳舞,琴弦蛇的琴声伴随着你们的舞步,一切都那么和谐美好。 +用户:我好像看到了,我们在花丛中跳舞,好开心。 +小睡仙:很好,就让这份快乐和和谐的感觉伴随着你进入梦乡。现在,慢慢地呼出最后一口气,让身体完全放松。晚安,愿你醒来时,带着希望和力量,去面对新的一天。 +""" +} +messages = json.dumps([{"role": "system", "content": character_prompts["小睡仙v2"]}], ensure_ascii=False) + +user_info_str = "{}" +tts_info = { + "language": 0, + "speaker_id": 32, + "noise_scale": 0.1, + "noise_scale_w": 0.668, + "length_scale": 1.3 +} +llm_info = { + "model": "abab5.5-chat", + "temperature": 0.9, + "top_p": 0.9, +} + +# 将tts和llm信息转化为json字符串 +tts_info_str = json.dumps(tts_info, ensure_ascii=False) +llm_info_str = json.dumps(llm_info, ensure_ascii=False) + +token = 0 +content = {"user_id": user_id, "messages": messages, "user_info": user_info_str, "tts_info": tts_info_str, + "llm_info": llm_info_str, "token": token} + +# print(json.dumps(content, ensure_ascii=False)) + +session_id = "6e4e7404-c4db-45ac-ba88-b0a483556f66" +# 小睡仙v2: 6e4e7404-c4db-45ac-ba88-b0a483556f66 +url = f"https://takway-ai.kingtous.cn/sessions/{session_id}" + +response = requests.put(url,json=content) + +# 检查请求是否成功 +if response.status_code == 200: + print('请求成功') +else: + print('请求失败,状态码:', response.status_code) + +# 打印返回的内容 +print(response.text) + + diff --git a/tools/web_request_mp_manager_vad.py b/tools/web_request_mp_manager_vad.py new file mode 100644 index 0000000..5ac5559 --- /dev/null +++ b/tools/web_request_mp_manager_vad.py @@ -0,0 +1,558 @@ +# basic +import time +import json +import random +from collections import deque +# log +import logging +import warnings +# multiprocessing +import queue +import threading +import multiprocessing +# web request +import requests +import pyaudio + +class WebRequestMPManager: + def __init__(self, + server_args, + audio_args, + recorder_args, + asr_args, + video_args, + emo_args, + log_args): + # server_args + self.server_args = server_args + # audio_args + self.record_CHUNK_SIZE = audio_args['record_CHUNK_SIZE'] + self.voice_trigger = audio_args['voice_trigger'] + self.keywords = audio_args['keywords'] + # recorder_args + self.recorder_args = recorder_args + # asr_args + self.asr_args = asr_args + # video_args + self.video_args = video_args + # emo_args + self.emo_args = emo_args + # log_args + self.log_args = log_args + + # TODO: 设计多进程log queue + self.logger_init() + + + def logger_init(self): + # log_args + log_level = self.log_args['log_level'] + log_file = self.log_args['log_file'] + + if log_level == 'debug': + log_level = logging.DEBUG + elif log_level == 'info': + log_level = logging.INFO + + # logger + self.logger = logging.getLogger('mylogger') + self.logger.setLevel(log_level) + # handler 创建一个handler,用于写入日志文件 + handler = logging.FileHandler(log_file) + handler.setLevel(log_level) + # stream handler 创建一个handler,用于输出到控制台 + console = logging.StreamHandler() + console.setLevel(logging.INFO) + + # 定义handler的输出格式(formatter) + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + handler.setFormatter(formatter) + console.setFormatter(formatter) + + # 添加handler + self.logger.addHandler(handler) + self.logger.addHandler(console) + self.logger.info("Logger started.") + + def process_init(self): + # multiprocessing + manager = multiprocessing.Manager() + self.trigger_queue = manager.Queue() + self.client_queue = manager.Queue() + self.audio_queue = manager.Queue() + self.audio_play_queue = manager.Queue() + self.emo_display_queue = manager.Queue() + + processes = [ + multiprocessing.Process(target=self.audio_process, args=(self.logger,self.voice_trigger,self.trigger_queue,self.client_queue)), + # multiprocessing.Process(target=self.camera_process, args=(self.trigger_queue,self.client_queue)), + # multiprocessing.Process(target=self.local_client_process, args=(self.logger,self.client_queue,self.audio_play_queue,self.emo_display_queue)), + # multiprocessing.Process(target=self.audio_play_process, args=(self.logger,self.audio_play_queue,)), + # multiprocessing.Process(target=self.emo_display_process, args=(self.logger,self.emo_display_queue,)), + ] + for process in processes: + process.start() + for process in processes: + process.join() + + def audio_process(self, logger, voice_trigger, trigger_queue, client_queue): + """audio_process + + Args: + voice_trigger: bool, whether to use voice trigger + trigger_queue: multiprocessing.Queue, trigger queue + client_queue: multiprocessing.Queue, client queue + """ + # from takway.audio_utils import Recorder + from takway.audio_utils import VADRecorder + recorder = VADRecorder( + **self.recorder_args, + ) + + # two threads for hardware and voice trigger + # shared data struct: + self.shared_waiting = False + self.shared_hd_trigger = False + self.shared_kw_trigger = False + self.shared_lock = threading.Lock() + + self.shared_data_lock = threading.Lock() + self.shared_audio_data = None + # vad + self.shared_vad_data = None + self.shared_vad_lock = threading.Lock() + # stt + # event + self.record_event = threading.Event() + self.vad_event = threading.Event() + self.stt_event = threading.Event() + + self._debug_count = 0 + + ''' + shared_waiting: 控制所有线程的待机状态,True表示待机,False表示工作 + shared_hd_trigger: 控制硬件触发器的状态,True表示触发,False表示未触发 + shared_kw_trigger: 控制语音触发器的状态,True表示触发,False表示未触发 + + share_audio_data: 共享音频数据,用于存储从麦克风采集的音频数据 + ''' + # create threads + threads = [threading.Thread(target=self.hardware_trigger_thread, args=(recorder,))] + if self.voice_trigger: + vioce_threads = [ + threading.Thread(target=self.voice_record_thread, args=(recorder,)), + # threading.Thread(target=self.vad_thread, args=(recorder,)), + threading.Thread(target=self.stt_thread, args=(recorder,)), + ] + threads.extend(vioce_threads) + for thread in threads: + thread.start() + # self.logger.info("Audio Process started.") + + while True: + ''' + # Warning: 一定要加延时!!!否则会有bug!!! + time.sleep(0.001) + if (self.shared_hd_trigger or self.shared_kw_trigger): + # print(f"self.shared_hd_trigger: {self.shared_hd_trigger}, self.shared_kw_trigger: {self.shared_kw_trigger}") + audio_data = self.shared_audio_data + trigger_queue.put(('trgrigger_status', True)) + client_queue.put(('audio', audio_data)) + self.shared_lock.acquire() # 加锁 + self.shared_hd_trigger = False + self.shared_kw_trigger = False + self.shared_audio_data = None + self.shared_waiting = False + self.shared_lock.release() # 释放锁 + ''' + self.record_event.wait() # 等待record线程被唤醒 + trigger_queue.put(('trgrigger_status', True)) + client_queue.put(('audio', self.shared_audio_data)) + # print(f"send audio data to client"); exit() + + def hardware_trigger_thread(self, recorder): + """hardware_trigger_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + self.logger.info("Hardware trigger thread started.") + + trgrigger_status = False + while True: + time.sleep(0.2) + if self.shared_waiting: + continue + trgrigger_status = recorder.get_hardware_trigger_status() + if trgrigger_status: + self.shared_lock.acquire() + self.shared_waiting = True # shared_waiting 控制所有线程的待机状态,True表示待机,False表示工作 + self.shared_hd_trigger = True # share_hd_trigger 控制硬件触发器的状态,True表示触发,False表示未触发 + self.shared_lock.release() + # record microphone data + audio_data = recorder.record_hardware() + self.shared_data_lock.acquire() + self.shared_audio_data = audio_data # shared_audio_data 共享音频数据,用于存储从麦克风采集的音频数据 + self.shared_data_lock.release() + self.record_event.set() # 唤醒record线程 + else: + self.shared_lock.acquire() + self.shared_waiting = False # 释放 + self.shared_lock.release() + + def voice_record_thread(self, recorder, keywords=['你好']): + """voice_record_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + self.logger.info("voice record thread started.") + + while True: + if self.shared_waiting: + time.sleep(0.01) + continue + + frames = [] + # status buffer + is_currently_speaking = False + buffer_size = recorder.vad_buffer_size + # buffer_size = 6 + active_buffer = deque([False for i in range(buffer_size-1)]+[True], maxlen=buffer_size) + audio_buffer = deque(maxlen=buffer_size) + silence_buffer = deque([True for i in range(buffer_size)]+[False], maxlen=buffer_size) + + while True: + data = recorder.record_chunk_voice( + CHUNK=recorder.vad_chunk_size, + return_type=None, + exception_on_overflow=False) + if data is None: + continue + t1 = time.time() + # print(f"VAD is_speech: {recorder.is_speech(data)}") + # print(f"VAD cost: {(time.time() - t1)/1000} ms") + if recorder.is_speech(data): + # 标志位buffer + active_buffer.append(True); active_buffer.popleft() + silence_buffer.append(False); silence_buffer.popleft() + # 暂时增加到buffer中 + audio_buffer.append(data) + # 如果满足检测要求 + if all(active_buffer): + if not is_currently_speaking: + print("Speech start detected") + is_currently_speaking = True + frames.extend(audio_buffer) # 把说话的buffer也加上 + if is_currently_speaking: + frames.append(data) + else: + # 标志位buffer + # active_buffer.append(False); active_buffer.popleft() + silence_buffer.append(True); silence_buffer.popleft() + if all(silence_buffer): + # 检测到人声并持续录音 + if is_currently_speaking: + # 结束标志位 + print("Speech end detected") + # print("frames length: ", len(frames)) + self.shared_vad_lock.acquire() + self.shared_vad_data = frames + self.shared_vad_lock.release() + self.stt_event.set() # 唤醒stt线程 + print("Wake stt thread") + break + else: + frames = [] + ''' + # print(f"audio_data: {len(audio_data)}") + self.shared_lock.acquire() + self.shared_audio_data = audio_data + self.shared_lock.release() + self.vad_event.set() # 唤醒vad线程 + ''' + ''' + def vad_thread(self, recorder): + self.logger.info("VAD thread started.") + while True: + frames = [] + # status buffer + is_currently_speaking = False + buffer_size = recorder.vad_buffer_size + active_buffer = deque([False for i in range(buffer_size)], maxlen=buffer_size) + audio_buffer = deque(maxlen=buffer_size) + silence_buffer = deque([True for i in range(buffer_size)], maxlen=buffer_size) + + while True: + self.vad_event.wait() # 等待vad线程被唤醒 + data = self.shared_audio_data + if data is None: + continue + t1 = time.time() + print(f"VAD is_speech: {recorder.is_speech(data)}") + print(f"VAD cost: {(time.time() - t1)/1000} ms") + if recorder.is_speech(data): + # 标志位buffer + active_buffer.append(True); active_buffer.popleft() + silence_buffer.append(False); silence_buffer.popleft() + # 暂时增加到buffer中 + audio_buffer.append(data) + # 如果满足检测要求 + if all(active_buffer): + if not is_currently_speaking: + print("Speech start detected") + is_currently_speaking = True + frames.extend(audio_buffer) # 把说话的buffer也加上 + if is_currently_speaking: + frames.append(data) + else: + # 标志位buffer + active_buffer.append(False); active_buffer.popleft() + silence_buffer.append(True); silence_buffer.popleft() + # 检测到人声并持续录音 + if is_currently_speaking: + # 结束标志位 + if all(silence_buffer): + print("Speech end detected") + # print("frames length: ", len(frames)) + self.shared_vad_lock.acquire() + self.shared_vad_data = frames + self.shared_vad_lock.release() + self.stt_event.set() # 唤醒stt线程 + break + ''' + + def stt_thread(self, recorder): + """stt_thread + + Args: + recorder: takway.audio_utils.Recorder, recorder object + """ + self.logger.info("STT thread started.") + from takway.vosk_utils import AutoSpeechRecognizer + asr = AutoSpeechRecognizer(**self.asr_args) + asr.add_keyword(self.keywords) + + kw_trgrigger_status = False + while True: + self.stt_event.wait() # 等待stt线程被唤醒 + print("STT thread start") + data = self.shared_vad_data + if data is None: + continue + print("Start to Recongnize key words") + kw_trgrigger_status = asr.recognize_keywords(data, partial_size=512) + print("Finish to Recongnize key words") + if kw_trgrigger_status: + self.shared_lock.acquire() + self.shared_kw_trigger = True # share_kw_trigger 语音关键词触发器的状态,True表示触发,False表示未触发 + self.shared_lock.release() + self.record_event.set() # 唤醒record线程 + kw_trgrigger_status = False + # print(f"Got keyword trigger"); exit() + + def camera_process(self, logger, trigger_queue, client_queue): + from takway.cam_utils import Camera + cam = Camera(self.video_args) + while True: + if trigger_queue.empty(): + time.sleep(0.5) + else: + item = trigger_queue.get() + if item[0] == 'trgrigger_status' and item[1]: + _, frame = cap.read() + client_queue.put(('image', frame)) + + + def local_client_process(self, logger, client_queue,audio_play_queue,emo_display_queue): + from takway.client_utils import Client + client = Client(**self.server_args) + # print("Local client process started.") + self.logger.info("Local client process started.") + image = None; audio = None + chat_status = 'init' + while True: + if client_queue.empty(): + time.sleep(0.2) + else: + item = client_queue.get() + # print(f"Get item: {item[0]}") + if item[0] == 'image': + # TODO: analyise image and send text to server + image = None + if item[0] == 'audio': + audio = item[1] + print("get audio data.") + emo_display_queue.put(('emo_data', 'happy')) + ''' + # 发送数据到服务器 + response = client.send_data_to_server( + text=None, audio_data=audio, image_data=None, chat_status=chat_status) + print("get response from server.") + chat_status = 'chating' + print(f"response: {response}") + + audio_play_queue.put(('audio', response)) + ''' + image = None; audio = None + + def audio_play_process(self, logger, audio_play_queue): + from takway.audio_utils import AudioPlayer + audio_player = AudioPlayer() + self.logger.info("Audio play process started.") + while True: + if audio_play_queue.empty(): + time.sleep(0.2) + else: + item = audio_play_queue.get() + if item[0] == 'server_data': + # 播放音频 + print("Playing audio...") + server_data = item[1] + audio_player.play(server_data['audio_base64'], audio_type='base64') + + def emo_display_process(self, logger, emo_display_queue): + from takway.emo_utils import EmoVideoPlayer + emo_player = EmoVideoPlayer(**self.emo_args) + self.logger.info("Emo display process started.") + # logger.info("Emo display process started.") + # print("Emo display process started.") + while True: + if emo_display_queue.empty(): + time.sleep(0.2) + seed = random.randrange(0, 1000) + print(f"seed: {seed}") + if seed < 100: + # emo_player.display_emo_opencv(emo_name='静态', stage='seldom_wink') + emo_player.display_emo_maixsense(emo_name='静态', stage='seldom_wink') + + else: + item = emo_display_queue.get() + print(f"Emo display process Get item: {item[0]}") + if item[0] == 'emo_data': + server_data = item[1] + print("Displaying emo...") + + # emo_player.display_emo_opencv(emo_name='静态', stage='seldom_wink') + # emo_player.display_emo_opencv(emo_name='静态', stage='quick_wink') + emo_player.display_emo_maixsense(emo_name='静态', stage='seldom_wink') + emo_player.display_emo_maixsense(emo_name='静态', stage='quick_wink') + + + + ''' + def display_process(q): + print("Display process started.") + while True: + item = q.get() + if item[0] == 'server_data': + server_data = item[1] + # 显示图像和文本 + # print("Displaying image and text:", item[1]['image'], item[1]['text']) + print("Displaying image and text:") + # 这里可以加上实际的显示图像和文本的代码 + if item[0] == 'image': + # 显示图像和文本 + cv2.imshow('image', item[1]) + cv2.waitKey(1) + ''' + +if __name__ == '__main__': + + try: + import gpiod as gpio + model_path="vosk-model-small-cn-0.22" + emo_dir="ResizedEmoji" + except: + model_path=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\vosk-model-small-cn-0.22" + emo_dir=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\ResizedEmoji" + + import argparse + parser = argparse.ArgumentParser() + # server params + parser.add_argument('--server_url', type=str, default='http://127.0.0.1:5000/process_all', help='Server url') + # audio paramters + parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger') + parser.add_argument('--record_CHUNK_SIZE', type=int, default=8000, help='Record chunk size') + parser.add_argument('--keywords', type=list, default=['你好'], help='Voice trigger keywords') + # recorder paramters + parser.add_argument('--hd_trigger', type=str, default='keyboard', help='Hardware trigger') + parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key') + parser.add_argument('--CHUNK', type=int, default=2048, help='Record chunk size') + parser.add_argument('--RATE', type=int, default=8000, help='Audio rate') + parser.add_argument('--FORMAT', type=int, default=16, help='Audio format') + parser.add_argument('--CHANNELS', type=int, default=1, help='Audio channels') + parser.add_argument('--filename', type=str, default=None, help='Audio file name') + # ASR paramters + # model_path="vosk-model-small-cn-0.22" + # model_path=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\vosk-model-small-cn-0.22" + parser.add_argument('--model_path', type=str, default=model_path, help='Vosk model path') + # video paramters + parser.add_argument('--device', type=str, default='pc', help='Video device') + parser.add_argument('--width', type=int, default=1280, help='Video width') + parser.add_argument('--height', type=int, default=720, help='Video height') + # emo paramters + # emo_dir="ResizedEmoji" + # emo_dir=r"G:\WorkSpace\CodeWorkspace\GPT_projects\vits_project\vits-uma-genshin-honkai\ResizedEmoji" + parser.add_argument('--emo_dir', type=str, default=emo_dir, help='Emo dir') + # log paramters + parser.add_argument('--log_file', type=str, default='my.log', help='Log file') + parser.add_argument('--log_level', type=str, default='INFO', help='Log level') + + parser.add_argument('--debug', type=bool, default=True, help='Debug mode') + args = parser.parse_args() + + + # sort out args and params + server_args = { + 'server_url': args.server_url, + } + + audio_args = { + 'voice_trigger': args.voice_trigger, + 'keywords': args.keywords, + 'record_CHUNK_SIZE': args.record_CHUNK_SIZE, + } + + recorder_args = { + 'hd_trigger': args.hd_trigger, + 'keyboard_key': args.keyboard_key, + 'model_path': args.model_path, + 'CHUNK': args.CHUNK, + 'FORMAT': pyaudio.paInt16 if args.FORMAT == 16 else pyaudio.paInt32, + 'CHANNELS': args.CHANNELS, + 'RATE': args.RATE, + 'filename': args.filename, + } + + asr_args = { + 'model_path': args.model_path, + 'RATE': args.RATE, + 'debug': args.debug, + } + + video_args = { + 'device': args.device, + 'width': args.width, + 'height': args.height, + } + + emo_args = { + 'emo_dir': args.emo_dir, + } + + log_args = { + 'log_file': args.log_file, + 'log_level': args.log_level, + } + + + web_request_mp_manager = WebRequestMPManager( + server_args=server_args, + audio_args=audio_args, + recorder_args=recorder_args, + asr_args=asr_args, + video_args=video_args, + emo_args=emo_args, + log_args=log_args) + web_request_mp_manager.process_init() \ No newline at end of file diff --git a/ws_client.py b/ws_client.py new file mode 100644 index 0000000..adb93d6 --- /dev/null +++ b/ws_client.py @@ -0,0 +1,187 @@ +from takway.clients.web_socket_client_utils import WebSocketClinet +import pvporcupine +import pyaudio +import platform + + +if __name__ == '__main__': + + # server_url = 'ws://121.41.224.27:8000/chat' + # server_url = 'ws://39.107.254.69:33089/chat' + # server_url = 'wss://takway-ai.kingtous.cn/chat/streaming' + # server_url = 'ws://114.214.236.207:7878/chat/streaming' + server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary' + + # session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed' + # session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭 + # session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2 + session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新 + + + excute_args = {} + # excute_args = {'enable': True} + + ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==' + + system = platform.system() + if system == 'Windows': + print("WebSocketClinet runs on Windows system.") + board = None + elif system == 'Linux': + # board = 'v329' + board = 'orangepi' + ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw==' + + mircophone_device = None + speaker_device = None + + + if board == 'v329': + import gpiod as gpio + + keywords = ['hey google', 'ok google'] + keyword_paths = None + model_path = None + + keywords = ['可莉可莉'] + keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"] + model_path = r"picovoice_models/porcupine_params_zh.pv" + + hd_trigger = 'button' + player = 'maixsense' + elif board == 'orangepi': + + keywords = ['hey google', 'ok google'] + keyword_paths = None + model_path = None + + hd_trigger = 'button' + + mircophone_device = 2 + speaker_device = 2 + + else: + + keywords = ['hey google', 'ok google'] + keyword_paths = None + model_path = None + + hd_trigger = 'keyboard' + player = 'opencv' + + + import argparse + parser = argparse.ArgumentParser() + # server params + + + # recorder paramters + parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger') + parser.add_argument('--access_key',default=ACCESS_KEY, + help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)') + parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list, + default=keywords, + help='List of default keywords for detection. Available keywords: %s' % ', '.join( + '%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='') + parser.add_argument('--keyword_paths',nargs='+', + default=keyword_paths, + help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument") + parser.add_argument('--library_path',default=None, + help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`') + parser.add_argument('--model_path', + default=model_path, + help='Absolute path to the file containing model parameters. ' + 'Default: using the library provided by `pvporcupine`') + parser.add_argument('--sensitivities',type=float, + default=0.9, + help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher " + "sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 " + "will be used.") + parser.add_argument('--hd_trigger', type=str, + # default='keyboard', + default=hd_trigger, + help='Hardware trigger') + parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key') + + # press type + parser.add_argument('--press_type', type=str, default='long', choices=['long','single'], help='Press type') + + # recorder paramters + parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的 + parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate') + parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format') + parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels') + parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name') + parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer') + parser.add_argument('--min_stream_record_time', type=float, default=0.5, help='Min stream record time, sec') + parser.add_argument('--max_slience_time', type=int, default=30, help='Max slient time when recording, sec') + parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index') + + # player paramters + parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index') + parser.add_argument('--OUT_CHUNK', type=int, default=1024, help='Player chunk size') + parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate') + parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format') + parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels') + parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name') + parser.add_argument('--OUT_frames_per_buffer', type=int, default=1024, help='Player frames per buffer') + + # log paramters + parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file') + parser.add_argument('--log_level', type=str, default='INFO', help='Log level') + + parser.add_argument('--debug', type=bool, default=False, help='Debug mode') + args = parser.parse_args() + + # sort out args and params + server_args = { + 'server_url': server_url, + 'session_id': session_id, + } + + recorder_args = { + 'board': board, + 'access_key': args.access_key, + 'keywords': args.keywords, + 'keyword_paths': args.keyword_paths, + 'library_path': args.library_path, + 'model_path': args.model_path, + 'sensitivities': args.sensitivities, + 'hd_trigger': args.hd_trigger, + 'keyboard_key': args.keyboard_key, + 'press_type': args.press_type, + 'voice_trigger': args.voice_trigger, + 'CHUNK': args.IN_CHUNK, + 'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32, + 'CHANNELS': args.IN_CHANNELS, + 'RATE': args.IN_RATE, + 'max_slience_time': args.max_slience_time, + 'min_stream_record_time': args.min_stream_record_time, + 'input_device_index': args.mircophone_device, + 'frames_per_buffer': args.IN_frames_per_buffer, + } + + player_args = { + 'output_device_index': args.speaker_device, + 'CHUNK': args.OUT_CHUNK, + 'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32, + 'CHANNELS': args.OUT_CHANNELS, + 'RATE': args.OUT_RATE, + 'filename': args.OUT_filename, + 'frames_per_buffer': args.OUT_frames_per_buffer, + } + + log_args = { + 'log_file': args.log_file, + 'log_level': args.log_level, + } + + + localclient = WebSocketClinet( + board=board, + server_args=server_args, + recorder_args=recorder_args, + player_args=player_args, + excute_args=excute_args, + log_args=log_args) + localclient.process_init() \ No newline at end of file diff --git a/ws_client_long.py b/ws_client_long.py new file mode 100644 index 0000000..e0b8ea7 --- /dev/null +++ b/ws_client_long.py @@ -0,0 +1,181 @@ +from takway.clients.web_socket_client_utils import WebSocketClinet +import pvporcupine +import pyaudio +import platform + + +if __name__ == '__main__': + + # server_url = 'ws://121.41.224.27:8000/chat' + # server_url = 'ws://39.107.254.69:33089/chat' + # server_url = 'wss://takway-ai.kingtous.cn/chat/streaming' + # server_url = 'ws://114.214.236.207:7878/chat/streaming' + server_url = 'ws://takway-ai.top:8001/chat/streaming/temporary' + + # session_id = 'b5923335-a0dd-4d50-b3bf-5ce2a50894ed' + # session_id = '28445e6d-e8c1-46a6-b980-fbf39b918def' # 鸭 + # session_id = '6e4e7404-c4db-45ac-ba88-b0a483556f66' # 睡 v2 + session_id = '5a91646f-f70b-4549-9aa3-a9bf270ea371' # 新 + + + excute_args = {} + # excute_args = {'enable': True} + + ACCESS_KEY = 'hqNqw85hkJRXVjEevwpkreB8n8so3w9JPQ27qnCR5qTH8a3+XnkZTA==' + + system = platform.system() + if system == 'Windows': + print("WebSocketClinet runs on Windows system.") + board = None + elif system == 'Linux': + # board = 'v329' + board = 'orangepi' + ACCESS_KEY = 'GPFKn+Z9LHGh8yZNfWkLUYRixnrsyY+5w8KN3rpl6sw+Bi7XIqbgTw==' + + mircophone_device = None + speaker_device = None + + + if board == 'v329': + import gpiod as gpio + + keywords = ['hey google', 'ok google'] + keyword_paths = None + model_path = None + + keywords = ['可莉可莉'] + keyword_paths = [r"picovoice_models/可莉可莉_zh_raspberry-pi_v3_0_0.ppn"] + model_path = r"picovoice_models/porcupine_params_zh.pv" + + hd_trigger = 'button' + player = 'maixsense' + elif board == 'orangepi': + + keywords = ['hey google', 'ok google'] + keyword_paths = None + model_path = None + + hd_trigger = 'button' + + mircophone_device = 2 + speaker_device = 2 + + else: + + keywords = ['hey google', 'ok google'] + keyword_paths = None + model_path = None + + hd_trigger = 'keyboard' + player = 'opencv' + + + import argparse + parser = argparse.ArgumentParser() + # server params + + + # recorder paramters + parser.add_argument('--voice_trigger', type=bool, default=True, help='Voice trigger') + parser.add_argument('--access_key',default=ACCESS_KEY, + help='AccessKey obtained from Picovoice Console (https://console.picovoice.ai/)') + parser.add_argument('--keywords',nargs='+',choices=sorted(pvporcupine.KEYWORDS),type=list, + default=keywords, + help='List of default keywords for detection. Available keywords: %s' % ', '.join( + '%s' % w for w in sorted(pvporcupine.KEYWORDS)),metavar='') + parser.add_argument('--keyword_paths',nargs='+', + default=keyword_paths, + help="Absolute paths to keyword model files. If not set it will be populated from `--keywords` argument") + parser.add_argument('--library_path',default=None, + help='Absolute path to dynamic library. Default: using the library provided by `pvporcupine`') + parser.add_argument('--model_path', + default=model_path, + help='Absolute path to the file containing model parameters. ' + 'Default: using the library provided by `pvporcupine`') + parser.add_argument('--sensitivities',type=float, + default=0.9, + help="Sensitivities for detecting keywords. Each value should be a number within [0, 1]. A higher " + "sensitivity results in fewer misses at the cost of increasing the false alarm rate. If not set 0.5 " + "will be used.") + parser.add_argument('--hd_trigger', type=str, + # default='keyboard', + default=hd_trigger, + help='Hardware trigger') + parser.add_argument('--keyboard_key', type=str, default='space', help='Keyboard key') + + # recorder paramters + parser.add_argument('--IN_CHUNK', type=int, default=3840, help='Record chunk size') # 原来的 + parser.add_argument('--IN_RATE', type=int, default=16000, help='Audio rate') + parser.add_argument('--IN_FORMAT', type=int, default=16, help='Audio format') + parser.add_argument('--IN_CHANNELS', type=int, default=1, help='Audio channels') + parser.add_argument('--IN_filename', type=str, default=None, help='Audio file name') + parser.add_argument('--IN_frames_per_buffer', type=int, default=512, help='Frames per buffer') + parser.add_argument('--min_stream_record_time', type=int, default=0.8, help='Min stream record time, sec') + parser.add_argument('--mircophone_device', type=int, default=mircophone_device, help='Microphone device index') + + # player paramters + parser.add_argument('--speaker_device', type=int, default=speaker_device, help='Speaker device index') + parser.add_argument('--OUT_CHUNK', type=int, default=1024, help='Player chunk size') + parser.add_argument('--OUT_RATE', type=int, default=22050, help='Player audio rate') + parser.add_argument('--OUT_FORMAT', type=int, default=16, help='Player audio format') + parser.add_argument('--OUT_CHANNELS', type=int, default=1, help='Player audio channels') + parser.add_argument('--OUT_filename', type=str, default=None, help='Player audio file name') + parser.add_argument('--OUT_frames_per_buffer', type=int, default=1024, help='Player frames per buffer') + + # log paramters + parser.add_argument('--log_file', type=str, default='ws_client.log', help='Log file') + parser.add_argument('--log_level', type=str, default='INFO', help='Log level') + + parser.add_argument('--debug', type=bool, default=False, help='Debug mode') + args = parser.parse_args() + + # sort out args and params + server_args = { + 'server_url': server_url, + 'session_id': session_id, + } + + recorder_args = { + 'board': board, + 'access_key': args.access_key, + 'keywords': args.keywords, + 'keyword_paths': args.keyword_paths, + 'library_path': args.library_path, + 'model_path': args.model_path, + 'sensitivities': args.sensitivities, + 'hd_trigger': args.hd_trigger, + 'keyboard_key': args.keyboard_key, + 'voice_trigger': args.voice_trigger, + 'CHUNK': args.IN_CHUNK, + 'FORMAT': pyaudio.paInt16 if args.IN_FORMAT == 16 else pyaudio.paInt32, + 'CHANNELS': args.IN_CHANNELS, + 'RATE': args.IN_RATE, + 'min_stream_record_time': args.min_stream_record_time, + 'input_device_index': args.mircophone_device, + 'frames_per_buffer': args.IN_frames_per_buffer, + } + + player_args = { + 'output_device_index': args.speaker_device, + 'CHUNK': args.OUT_CHUNK, + 'FORMAT': pyaudio.paInt16 if args.OUT_FORMAT == 16 else pyaudio.paInt32, + 'CHANNELS': args.OUT_CHANNELS, + 'RATE': args.OUT_RATE, + 'filename': args.OUT_filename, + 'frames_per_buffer': args.OUT_frames_per_buffer, + } + + log_args = { + 'log_file': args.log_file, + 'log_level': args.log_level, + } + + + localclient = WebSocketClinet( + board=board, + server_args=server_args, + recorder_args=recorder_args, + player_args=player_args, + excute_args=excute_args, + log_args=log_args) + localclient.process_init() \ No newline at end of file