include paddle-speech

UPDATE paddle-speech
2025-02-17 14:40:28 +08:00 · 2022-11-16 15:16:27 +08:00 · 2022-11-16 15:16:27 +08:00 · 43282761d9
commit 43282761d9
parent f8dc37ae34
15 changed files with 30 additions and 160 deletions
--- a/aihub/deep-learning/paddlespeech-asr/Dockerfile
+++ b/aihub/deep-learning/paddlespeech-asr/Dockerfile
@ -1,5 +1,5 @@

-# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech  .
+# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech-asr  .
 FROM ccr.ccs.tencentyun.com/cube-studio/aihub:base-python3.8
 # 安装基础环境
 WORKDIR /
--- a/aihub/deep-learning/paddlespeech-asr/app.py
+++ b/aihub/deep-learning/paddlespeech-asr/app.py
@ -25,13 +25,13 @@ class Speech_Asr_Model(Model):

    inference_inputs = [
        Field(type=Field_type.audio, name='voice_file_path', label='语音文件',
-              describe='可支持上传一个语音文件'),
+              describe='上传一个语音文件试试吧~'),
    ]
    web_examples = [
        {
            "label": "示例1",
            "input": {
-                "voice_file_path": "a.wav"
+                "voice_file_path": "zh.wav"
            }
        }
    ]
--- a/aihub/deep-learning/paddlespeech-asr/init.sh
+++ b/aihub/deep-learning/paddlespeech-asr/init.sh
@ -4,3 +4,6 @@ conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
 pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
 pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
 pip install paddlespeech -i https://pypi.tuna.tsinghua.edu.cn/simple
+cd /app/
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
--- a/aihub/deep-learning/paddlespeech-cls/Dockerfile
+++ b/aihub/deep-learning/paddlespeech-cls/Dockerfile
@ -1,5 +1,5 @@

-# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech  .
+# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech-cls  .
 FROM ccr.ccs.tencentyun.com/cube-studio/aihub:base-python3.8
 # 安装基础环境
 WORKDIR /
--- a/aihub/deep-learning/paddlespeech-cls/app.py
+++ b/aihub/deep-learning/paddlespeech-cls/app.py
@ -25,13 +25,13 @@ class Speech_Cls_Model(Model):

    inference_inputs = [
        Field(type=Field_type.audio, name='voice_file_path', label='语音文件',
-              describe='上传一个语音文件吧~'),
+              describe='上传一个语音文件试试吧~'),
    ]
    web_examples = [
        {
            "label": "示例1",
            "input": {
-                "voice_file_path": "a.wav"
+                "voice_file_path": "zh.wav"
            }
        }
    ]
--- a/aihub/deep-learning/paddlespeech-cls/init.sh
+++ b/aihub/deep-learning/paddlespeech-cls/init.sh
@ -4,3 +4,6 @@ conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
 pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
 pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
 pip install paddlespeech -i https://pypi.tuna.tsinghua.edu.cn/simple
+cd /app/
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
--- a/aihub/deep-learning/paddlespeech-st/Dockerfile
+++ b/aihub/deep-learning/paddlespeech-st/Dockerfile
@ -1,5 +1,5 @@

-# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech  .
+# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech-st  .
 FROM ccr.ccs.tencentyun.com/cube-studio/aihub:base-python3.8
 # 安装基础环境
 WORKDIR /
--- a/aihub/deep-learning/paddlespeech-st/app.py
+++ b/aihub/deep-learning/paddlespeech-st/app.py
@ -31,7 +31,7 @@ class Speech_St_Model(Model):
        {
            "label": "示例1",
            "input": {
-                "voice_file_path": "a.wav"
+                "voice_file_path": "en.wav"
            }
        }
    ]
--- a/aihub/deep-learning/paddlespeech-st/init.sh
+++ b/aihub/deep-learning/paddlespeech-st/init.sh
@ -4,3 +4,6 @@ conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
 pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
 pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
 pip install paddlespeech -i https://pypi.tuna.tsinghua.edu.cn/simple
+cd /app/
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
--- a/aihub/deep-learning/paddlespeech-tts/Dockerfile
+++ b/aihub/deep-learning/paddlespeech-tts/Dockerfile
@ -1,5 +1,5 @@

-# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech  .
+# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech-tts  .
 FROM ccr.ccs.tencentyun.com/cube-studio/aihub:base-python3.8
 # 安装基础环境
 WORKDIR /
--- a/aihub/deep-learning/paddlespeech-tts/app.py
+++ b/aihub/deep-learning/paddlespeech-tts/app.py
@ -32,15 +32,18 @@ class Speech_Tts_Model(Model):
    inference_inputs = [
        Field(type=Field_type.text, name='text', label='语音转文本',
              describe='输入文本', default='cube studio 是个云原生一站式机器学习平台，欢迎大家体验！'),
-        Field(type=Field_type.text, name='spk_id', label='说话人ID',
-              describe='0-283可选，不一样的ID会带来不一样的声音', default=0),
+        # Field(type=Field_type.text, name='spk_id', label='说话人ID',
+        #       describe='0-283可选，不一样的ID会带来不一样的声音', default=0),
+        Field(type=Field_type.text_select, name='spk_id', label='说话人', default='0',
+              choices=['1', '2', '3', '4', '5'])
+
    ]
    web_examples = [
        {
            "label": "示例1",
            "input": {
                "text": "今天天气不错",
-                "spk_id": 0,
+                "spk_id": '0',
            }
        }
    ]
@ -57,19 +60,12 @@ class Speech_Tts_Model(Model):
    @pysnooper.snoop()
    def inference(self, text, spk_id=0):
        tts = self.tts
-        file_name = f"{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}-output.wav"
+        os.makedirs('result', exist_ok=True)
        if spk_id:
-            if str(spk_id).isdigit():
-                if 0 <= int(spk_id) <= 283:
-                    spk_id = int(spk_id)
-                else:
-                    back = [{'text': '警告：数字需要在0~283之间哦~'}]
-                    return back
-            else:
-                back = [{'text': '警告：说话人仅支持数字哦~'}]
-                return back
+            spk_id = int(spk_id) - 1 if int(spk_id) > 0 else int(spk_id)
        else:
            spk_id = 0
+        file_name = f"result/{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}-output.wav"
        tts(text=text, output=file_name, am='fastspeech2_mix', voc='hifigan_csmsc', lang='mix', spk_id=spk_id)
        back = [
            {
--- a/aihub/deep-learning/paddlespeech-tts/init.sh
+++ b/aihub/deep-learning/paddlespeech-tts/init.sh
@ -4,3 +4,6 @@ conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
 pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
 pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
 pip install paddlespeech -i https://pypi.tuna.tsinghua.edu.cn/simple
+cd /app/
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/zh.wav
+wget -c https://paddlespeech.bj.bcebos.com/PaddleAudio/en.wav
--- a/aihub/deep-learning/paddlespeech/Dockerfile
+++ b/aihub/deep-learning/paddlespeech/Dockerfile
@ -1,12 +0,0 @@
-
-# 构建应用镜像 docker build -t ccr.ccs.tencentyun.com/cube-studio/aihub:paddlespeech  .
-FROM ccr.ccs.tencentyun.com/cube-studio/aihub:base-python3.8
-# 安装基础环境
-WORKDIR /
-COPY init.sh /init.sh
-RUN bash /init.sh
-# 安装文件
-WORKDIR /app
-COPY * /app/
-ENTRYPOINT ["python", "app.py"]
-
--- a/aihub/deep-learning/paddlespeech/app-.py
+++ b/aihub/deep-learning/paddlespeech/app-.py
@ -1,118 +0,0 @@
-import base64
-import io, sys, os
-from cubestudio.aihub.model import Model
-from cubestudio.aihub.docker import Docker
-from cubestudio.aihub.web.server import Server, Field, Field_type
-
-import pysnooper
-import os
-import argparse, os, re
-import datetime
-import time
-
-# speech所需内容，可根据需要注释无需的加载项
-# 语音识别
-from paddlespeech.cli.asr.infer import ASRExecutor
-# 语音合成
-from paddlespeech.cli.tts.infer import TTSExecutor
-# 声音场景分类
-from paddlespeech.cli.cls.infer import CLSExecutor
-# 语音翻译
-from paddlespeech.cli.st.infer import STExecutor
-
-
-class Speech_Model(Model):
-    # 模型基础信息定义
-    name = 'paddle-speech'
-    label = '语音处理'
-    describe = "涵盖功能有语音转文字，文字转语音，语音翻译，语音场景识别"
-    field = "智能识别"
-    scenes = "语音处理"
-    status = 'online'
-    version = 'v20221114'
-    doc = 'https://github.com/PaddlePaddle/PaddleSpeech'  # 'https://帮助文档的链接地址'
-    # pic = 'https://images.nightcafe.studio//assets/stable-tile.jpg'  # https://应用描述的缩略图/可以直接使用应用内的图片文件地址
-
-    inference_resource = {
-        "resource_gpu": "1"
-    }
-
-    inference_inputs = [
-        Field(type=Field_type.text, name='text', label='语音合成文本',
-              describe='输入文本', default='cube studio 是个云原生一站式机器学习平台，欢迎大家体验！'),
-        Field(type=Field_type.audio, name='voice', label='语音文件',
-              describe='可支持上传一个语音文件，在下方选择一种想要进行的操作吧~'),
-    ]
-    web_examples = [
-        {
-            "label": "语音合成",
-            "data": {
-                "work": ['语音分类', '语音识别', '...'],
-                "text": '今天天气不错',
-                "name": 'a.wav',
-            }
-        }
-    ]
-
-    def __init__(self):
-        self.st = None
-        self.asr = None
-        self.tts = None
-        self.cls = None
-
-    # 加载模型
-    # @pysnooper.snoop()
-    def load_model(self):
-        self.cls = CLSExecutor()  # 语音分类
-        self.tts = TTSExecutor()  # 语音合成
-        self.asr = ASRExecutor()  # 语音识别
-        self.st = STExecutor()  # 语音翻译
-
-    # 推理
-    @pysnooper.snoop()
-    def inference(self, **kwargs):
-        """
-        kwargs例子：
-            {
-            "label": "语音合成",
-            "data": {
-                "work": ['语音分类', '语音识别', '...'],
-                "text": '今天天气不错',
-                "file_name": 'a.wav',
-                }
-            }
-        """
-        result = ''
-        file_PATH = ''
-        if kwargs['label'] == '语音合成':
-            tts = self.tts
-            file_name = f"{kwargs['data']['name']}-{datetime.datetime.now().strftime('%Y%m%d%H%M%S')}-output.wav"
-            tts(text=kwargs['data']['text'], output=file_name)
-            file_PATH = file_name
-        elif kwargs['label'] == '语音内容识别':
-            # file = open(kwargs['data']['name'], "wb")  # 写入二进制文件
-            # text = base64.b64decode(kwargs['data']['text'])  # 进行解码
-            # file.write(text)
-            if '语音分类' in kwargs['data']['works']:
-                cls = self.cls
-                result += '语音分类结果： '
-                result += cls(audio_file=kwargs['data']['file_name']) + '\r\n'
-            if '语音识别' in kwargs['data']['works']:
-                asr = self.asr
-                result += '语音识别结果： '
-                result += asr(audio_file=kwargs['data']['file_name']) + '\r\n'
-            if '语音翻译' in kwargs['data']['works']:
-                st = self.st
-                result += '语音翻译结果： '
-                result += ''.join(st(audio_file=kwargs['data']['file_name'])) + '\r\n'
-        return file_PATH, result
-
-
-model = Speech_Model()
-# model.load_model()
-# result = model.inference(prompt='a photograph of an astronaut riding a horse',device='cpu')  # 测试
-# print(result)
-
-# 启动服务
-server = Server(model=model)
-server.server(port=8080)
--- a/aihub/deep-learning/paddlespeech/init.sh
+++ b/aihub/deep-learning/paddlespeech/init.sh
@ -1,8 +0,0 @@
-apt install build-essential
-git clone https://github.com/PaddlePaddle/PaddleSpeech.git
-cd PaddleSpeech
-
-conda install -y -c conda-forge sox libsndfile swig bzip2 libflac bc
-pip install pytest-runner -i https://pypi.tuna.tsinghua.edu.cn/simple
-pip install paddlepaddle-gpu -i https://mirror.baidu.com/pypi/simple
-pip install -e .