mirror of
https://github.com/babysor/MockingBird.git
synced 2024-11-21 01:13:57 +08:00
add docker support (#802)
* add docker support * 修复训练集解压问题 * 修复web.py启动问题 * 混合数据集训练参数
This commit is contained in:
parent
b402f9dbdf
commit
cd20d21f3d
4
.dockerignore
Normal file
4
.dockerignore
Normal file
@ -0,0 +1,4 @@
|
||||
*/saved_models
|
||||
!vocoder/saved_models/pretrained/**
|
||||
!encoder/saved_models/pretrained.pt
|
||||
/datasets
|
5
.gitignore
vendored
5
.gitignore
vendored
@ -18,4 +18,7 @@
|
||||
!vocoder/saved_models/pretrained/**
|
||||
!encoder/saved_models/pretrained.pt
|
||||
wavs
|
||||
log
|
||||
log
|
||||
!/docker-entrypoint.sh
|
||||
!/datasets_download/*.sh
|
||||
/datasets
|
17
Dockerfile
Normal file
17
Dockerfile
Normal file
@ -0,0 +1,17 @@
|
||||
FROM pytorch/pytorch:latest
|
||||
|
||||
RUN apt-get update && apt-get install -y build-essential ffmpeg parallel aria2 && apt-get clean
|
||||
|
||||
COPY ./requirements.txt /workspace/requirements.txt
|
||||
|
||||
RUN pip install -r requirements.txt && pip install webrtcvad-wheels
|
||||
|
||||
COPY . /workspace
|
||||
|
||||
VOLUME [ "/datasets", "/workspace/synthesizer/saved_models/" ]
|
||||
|
||||
ENV DATASET_MIRROR=default FORCE_RETRAIN=false TRAIN_DATASETS=aidatatang_200zh\ magicdata\ aishell3\ data_aishell TRAIN_SKIP_EXISTING=true
|
||||
|
||||
EXPOSE 8080
|
||||
|
||||
ENTRYPOINT [ "/workspace/docker-entrypoint.sh" ]
|
8
datasets_download/CN.txt
Normal file
8
datasets_download/CN.txt
Normal file
@ -0,0 +1,8 @@
|
||||
https://openslr.magicdatatech.com/resources/62/aidatatang_200zh.tgz
|
||||
out=download/aidatatang_200zh.tgz
|
||||
https://openslr.magicdatatech.com/resources/68/train_set.tar.gz
|
||||
out=download/magicdata.tgz
|
||||
https://openslr.magicdatatech.com/resources/93/data_aishell3.tgz
|
||||
out=download/aishell3.tgz
|
||||
https://openslr.magicdatatech.com/resources/33/data_aishell.tgz
|
||||
out=download/data_aishell.tgz
|
8
datasets_download/EU.txt
Normal file
8
datasets_download/EU.txt
Normal file
@ -0,0 +1,8 @@
|
||||
https://openslr.elda.org/resources/62/aidatatang_200zh.tgz
|
||||
out=download/aidatatang_200zh.tgz
|
||||
https://openslr.elda.org/resources/68/train_set.tar.gz
|
||||
out=download/magicdata.tgz
|
||||
https://openslr.elda.org/resources/93/data_aishell3.tgz
|
||||
out=download/aishell3.tgz
|
||||
https://openslr.elda.org/resources/33/data_aishell.tgz
|
||||
out=download/data_aishell.tgz
|
8
datasets_download/US.txt
Normal file
8
datasets_download/US.txt
Normal file
@ -0,0 +1,8 @@
|
||||
https://us.openslr.org/resources/62/aidatatang_200zh.tgz
|
||||
out=download/aidatatang_200zh.tgz
|
||||
https://us.openslr.org/resources/68/train_set.tar.gz
|
||||
out=download/magicdata.tgz
|
||||
https://us.openslr.org/resources/93/data_aishell3.tgz
|
||||
out=download/aishell3.tgz
|
||||
https://us.openslr.org/resources/33/data_aishell.tgz
|
||||
out=download/data_aishell.tgz
|
4
datasets_download/datasets.sha256sum
Normal file
4
datasets_download/datasets.sha256sum
Normal file
@ -0,0 +1,4 @@
|
||||
0c0ace77fe8ee77db8d7542d6eb0b7ddf09b1bfb880eb93a7fbdbf4611e9984b /datasets/download/aidatatang_200zh.tgz
|
||||
be2507d431ad59419ec871e60674caedb2b585f84ffa01fe359784686db0e0cc /datasets/download/aishell3.tgz
|
||||
a4a0313cde0a933e0e01a451f77de0a23d6c942f4694af5bb7f40b9dc38143fe /datasets/download/data_aishell.tgz
|
||||
1d2647c614b74048cfe16492570cc5146d800afdc07483a43b31809772632143 /datasets/download/magicdata.tgz
|
8
datasets_download/default.txt
Normal file
8
datasets_download/default.txt
Normal file
@ -0,0 +1,8 @@
|
||||
https://www.openslr.org/resources/62/aidatatang_200zh.tgz
|
||||
out=download/aidatatang_200zh.tgz
|
||||
https://www.openslr.org/resources/68/train_set.tar.gz
|
||||
out=download/magicdata.tgz
|
||||
https://www.openslr.org/resources/93/data_aishell3.tgz
|
||||
out=download/aishell3.tgz
|
||||
https://www.openslr.org/resources/33/data_aishell.tgz
|
||||
out=download/data_aishell.tgz
|
8
datasets_download/download.sh
Executable file
8
datasets_download/download.sh
Executable file
@ -0,0 +1,8 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -Eeuo pipefail
|
||||
|
||||
aria2c -x 10 --disable-ipv6 --input-file /workspace/datasets_download/${DATASET_MIRROR}.txt --dir /datasets --continue
|
||||
|
||||
echo "Verifying sha256sum..."
|
||||
parallel --will-cite -a /workspace/datasets_download/datasets.sha256sum "echo -n {} | sha256sum -c"
|
29
datasets_download/extract.sh
Executable file
29
datasets_download/extract.sh
Executable file
@ -0,0 +1,29 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
set -Eeuo pipefail
|
||||
|
||||
mkdir -p /datasets/aidatatang_200zh
|
||||
if [ -z "$(ls -A /datasets/aidatatang_200zh)" ] ; then
|
||||
tar xvz --directory /datasets/ -f /datasets/download/aidatatang_200zh.tgz --exclude 'aidatatang_200zh/corpus/dev/*' --exclude 'aidatatang_200zh/corpus/test/*'
|
||||
cd /datasets/aidatatang_200zh/corpus/train/
|
||||
cat *.tar.gz | tar zxvf - -i
|
||||
rm -f *.tar.gz
|
||||
fi
|
||||
|
||||
mkdir -p /datasets/magicdata
|
||||
if [ -z "$(ls -A /datasets/magicdata)" ] ; then
|
||||
tar xvz --directory /datasets/magicdata -f /datasets/download/magicdata.tgz train/
|
||||
fi
|
||||
|
||||
mkdir -p /datasets/aishell3
|
||||
if [ -z "$(ls -A /datasets/aishell3)" ] ; then
|
||||
tar xvz --directory /datasets/aishell3 -f /datasets/download/aishell3.tgz train/
|
||||
fi
|
||||
|
||||
mkdir -p /datasets/data_aishell
|
||||
if [ -z "$(ls -A /datasets/data_aishell)" ] ; then
|
||||
tar xvz --directory /datasets/ -f /datasets/download/data_aishell.tgz
|
||||
cd /datasets/data_aishell/wav/
|
||||
cat *.tar.gz | tar zxvf - -i --exclude 'dev/*' --exclude 'test/*'
|
||||
rm -f *.tar.gz
|
||||
fi
|
23
docker-compose.yml
Normal file
23
docker-compose.yml
Normal file
@ -0,0 +1,23 @@
|
||||
version: '3.8'
|
||||
|
||||
services:
|
||||
server:
|
||||
image: mockingbird:latest
|
||||
build: .
|
||||
volumes:
|
||||
- ./datasets:/datasets
|
||||
- ./synthesizer/saved_models:/workspace/synthesizer/saved_models
|
||||
environment:
|
||||
- DATASET_MIRROR=US
|
||||
- FORCE_RETRAIN=false
|
||||
- TRAIN_DATASETS=aidatatang_200zh magicdata aishell3 data_aishell
|
||||
- TRAIN_SKIP_EXISTING=true
|
||||
ports:
|
||||
- 8080:8080
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
devices:
|
||||
- driver: nvidia
|
||||
device_ids: [ '0' ]
|
||||
capabilities: [ gpu ]
|
17
docker-entrypoint.sh
Executable file
17
docker-entrypoint.sh
Executable file
@ -0,0 +1,17 @@
|
||||
#!/usr/bin/env bash
|
||||
|
||||
if [ -z "$(ls -A /workspace/synthesizer/saved_models)" ] || [ "$FORCE_RETRAIN" = true ] ; then
|
||||
/workspace/datasets_download/download.sh
|
||||
/workspace/datasets_download/extract.sh
|
||||
for DATASET in ${TRAIN_DATASETS}
|
||||
do
|
||||
if [ "$TRAIN_SKIP_EXISTING" = true ] ; then
|
||||
python pre.py /datasets -d ${DATASET} -n $(nproc) --skip_existing
|
||||
else
|
||||
python pre.py /datasets -d ${DATASET} -n $(nproc)
|
||||
fi
|
||||
done
|
||||
python synthesizer_train.py mandarin /datasets/SV2TTS/synthesizer
|
||||
fi
|
||||
|
||||
python web.py
|
@ -25,4 +25,8 @@ streamlit==1.8.0
|
||||
PyYAML==5.4.1
|
||||
torch_complex
|
||||
espnet
|
||||
PyWavelets
|
||||
PyWavelets
|
||||
fastapi
|
||||
loguru
|
||||
typer[all]
|
||||
click==8.0.4
|
Loading…
Reference in New Issue
Block a user