From fdee2e75202d4e027c78d79052559a03a4290d14 Mon Sep 17 00:00:00 2001 From: killua4396 <1223086337@qq.com> Date: Wed, 22 May 2024 15:26:01 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E9=80=9A=E8=BF=87=E4=BE=9D=E8=B5=96?= =?UTF-8?q?=E6=B3=A8=E5=85=A5=E6=9D=A5=E8=8E=B7=E5=8F=96asr=E5=92=8Ctts?= =?UTF-8?q?=E5=AF=B9=E8=B1=A1?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- app/controllers/chat_controller.py | 19 +++++++------------ app/dependencies/asr.py | 11 +++++++++++ app/dependencies/tts.py | 11 +++++++++++ requirements.txt | 7 ++++++- 4 files changed, 35 insertions(+), 13 deletions(-) create mode 100644 app/dependencies/asr.py create mode 100644 app/dependencies/tts.py diff --git a/app/controllers/chat_controller.py b/app/controllers/chat_controller.py index 8a7c51c..17f2a85 100644 --- a/app/controllers/chat_controller.py +++ b/app/controllers/chat_controller.py @@ -1,6 +1,8 @@ from ..schemas.chat_schema import * from ..dependencies.logger import get_logger from ..dependencies.summarizer import get_summarizer +from ..dependencies.asr import get_asr +from ..dependencies.tts import get_tts from .controller_enum import * from ..models import UserCharacter, Session, Character, User from utils.audio_utils import VAD @@ -19,21 +21,14 @@ logger = get_logger() # 依赖注入获取context总结服务 summarizer = get_summarizer() -# --------------------初始化本地ASR----------------------- -from utils.stt.modified_funasr import ModifiedRecognizer - -asr = ModifiedRecognizer() -logger.info("本地ASR初始化成功") +# -----------------------获取ASR------------------------- +asr = get_asr() # ------------------------------------------------------- -# --------------------初始化本地VITS---------------------- -from utils.tts.vits_utils import TextToSpeech - -tts = TextToSpeech(device='cpu') -logger.info("本地TTS初始化成功") +# -------------------------TTS-------------------------- +tts = get_tts() # ------------------------------------------------------- - # 依赖注入获取Config Config = get_config() @@ -488,7 +483,7 @@ async def scl_llm_handler(ws,session_id,response_type,llm_info,tts_info,db,redis redis.set(session_id,json.dumps(session_content,ensure_ascii=False)) #更新session is_first = True llm_response = "" - if token_count > summarizer.max_token * 0.7: #如果llm返回的token数大于60%的最大token数,则进行文本摘要 + if token_count > summarizer.max_token * 0.7: #如果llm返回的token数大于70%的最大token数,则进行文本摘要 system_prompt = messages[0]['content'] summary = await summarizer.summarize(messages) events = user_info['events'] diff --git a/app/dependencies/asr.py b/app/dependencies/asr.py new file mode 100644 index 0000000..2fffb03 --- /dev/null +++ b/app/dependencies/asr.py @@ -0,0 +1,11 @@ +from utils.stt.modified_funasr import ModifiedRecognizer +from app.dependencies.logger import get_logger + +logger = get_logger() + +#初始化全局asr对象 +asr = ModifiedRecognizer() +logger.info("ASR初始化成功") + +def get_asr(): + return asr \ No newline at end of file diff --git a/app/dependencies/tts.py b/app/dependencies/tts.py new file mode 100644 index 0000000..616f82d --- /dev/null +++ b/app/dependencies/tts.py @@ -0,0 +1,11 @@ +from utils.tts.openvoice_utils import TextToSpeech +from app.dependencies.logger import get_logger + +logger = get_logger() + +#初始化全局tts对象 +tts = TextToSpeech(use_tone_convert=True,device='cuda') +logger.info("TTS初始化成功") + +def get_tts(): + return tts \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index cee01bd..f218acb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,4 +21,9 @@ apscheduler aiohttp faster_whisper whisper_timestamped -modelscope \ No newline at end of file +<<<<<<< Updated upstream +modelscope +======= +modelscope +wavmark +>>>>>>> Stashed changes