Skip to content

Commit acea40b

Browse files
authored
Merge pull request #233 from xinghai-sun/dir_tree
Re-organize folder structure and hierarchy for DS2.
2 parents b56a548 + aa8a61b commit acea40b

40 files changed

+280
-129
lines changed

deep_speech_2/README.md

100755100644
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,7 @@
11
# DeepSpeech2 on PaddlePaddle
22

3+
>TODO: to be updated, since the directory hierarchy was changed.
4+
35
## Installation
46

57
```

deep_speech_2/cloud/README.md

100755100644
File mode changed.

deep_speech_2/cloud/pcloud_submit.sh

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
1-
TRAIN_MANIFEST="cloud/cloud.manifest.train"
2-
DEV_MANIFEST="cloud/cloud.manifest.dev"
1+
#! /usr/bin/bash
2+
3+
TRAIN_MANIFEST="cloud/cloud_manifests/cloud.manifest.train"
4+
DEV_MANIFEST="cloud/cloud_manifests/cloud.manifest.dev"
35
CLOUD_MODEL_DIR="./checkpoints"
4-
BATCH_SIZE=256
6+
BATCH_SIZE=512
57
NUM_GPU=8
68
NUM_NODE=1
79
IS_LOCAL="True"

deep_speech_2/cloud/pcloud_train.sh

Lines changed: 26 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
#! /usr/bin/bash
2+
13
TRAIN_MANIFEST=$1
24
DEV_MANIFEST=$2
35
MODEL_PATH=$3
@@ -14,11 +16,29 @@ python ./cloud/split_data.py \
1416
--out_manifest_path='/local.manifest.dev'
1517

1618
python -u train.py \
17-
--batch_size=$BATCH_SIZE \
18-
--use_gpu=1 \
19+
--batch_size=${BATCH_SIZE} \
1920
--trainer_count=${NUM_GPU} \
20-
--num_threads_data=${NUM_GPU} \
21+
--num_passes=200 \
22+
--num_proc_data=${NUM_GPU} \
23+
--num_conv_layers=2 \
24+
--num_rnn_layers=3 \
25+
--rnn_layer_size=2048 \
26+
--num_iter_print=100 \
27+
--learning_rate=5e-4 \
28+
--max_duration=27.0 \
29+
--min_duration=0.0 \
30+
--use_sortagrad=True \
31+
--use_gru=False \
32+
--use_gpu=True \
2133
--is_local=${IS_LOCAL} \
22-
--train_manifest_path='/local.manifest.train' \
23-
--dev_manifest_path='/local.manifest.dev' \
24-
--output_model_dir=${MODEL_PATH} 2>&1 | tee ./log/train.log
34+
--share_rnn_weights=True \
35+
--train_manifest='/local.manifest.train' \
36+
--dev_manifest='/local.manifest.dev' \
37+
--mean_std_path='data/librispeech/mean_std.npz' \
38+
--vocab_path='data/librispeech/eng_vocab.txt' \
39+
--output_model_dir='./checkpoints' \
40+
--output_model_dir=${MODEL_PATH} \
41+
--augment_conf_path='conf/augmentation.config' \
42+
--specgram_type='linear' \
43+
--shuffle_method='batch_shuffle_clipped' \
44+
2>&1 | tee ./log/train.log

deep_speech_2/cloud/pcloud_upload_data.sh

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,9 @@
1-
IN_MANIFESTS="../datasets/manifest.train ../datasets/manifest.dev ../datasets/manifest.test"
2-
OUT_MANIFESTS="./cloud.manifest.train ./cloud.manifest.dev ./cloud.manifest.test"
1+
#! /usr/bin/bash
2+
3+
mkdir cloud_manifests
4+
5+
IN_MANIFESTS="../data/librispeech/manifest.train ../data/librispeech/manifest.dev-clean ../data/librispeech/manifest.test-clean"
6+
OUT_MANIFESTS="cloud_manifests/cloud.manifest.train cloud_manifests/cloud.manifest.dev cloud_manifests/cloud.manifest.test"
37
CLOUD_DATA_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/data/librispeech"
48
NUM_SHARDS=50
59

@@ -14,4 +18,5 @@ then
1418
echo "Upload Data Failed!"
1519
exit 1
1620
fi
21+
1722
echo "All Done."
File renamed without changes.
File renamed without changes.
File renamed without changes.

deep_speech_2/data_utils/augmentor/impulse_response.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,23 +4,22 @@
44
from __future__ import print_function
55

66
from data_utils.augmentor.base import AugmentorBase
7-
from data_utils import utils
7+
from data_utils.utility import read_manifest
88
from data_utils.audio import AudioSegment
99

1010

1111
class ImpulseResponseAugmentor(AugmentorBase):
1212
"""Augmentation model for adding impulse response effect.
13-
13+
1414
:param rng: Random generator object.
1515
:type rng: random.Random
1616
:param impulse_manifest_path: Manifest path for impulse audio data.
17-
:type impulse_manifest_path: basestring
17+
:type impulse_manifest_path: basestring
1818
"""
1919

2020
def __init__(self, rng, impulse_manifest_path):
2121
self._rng = rng
22-
self._impulse_manifest = utils.read_manifest(
23-
manifest_path=impulse_manifest_path)
22+
self._impulse_manifest = read_manifest(impulse_manifest_path)
2423

2524
def transform_audio(self, audio_segment):
2625
"""Add impulse response effect.

deep_speech_2/data_utils/augmentor/noise_perturb.py

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -4,29 +4,28 @@
44
from __future__ import print_function
55

66
from data_utils.augmentor.base import AugmentorBase
7-
from data_utils import utils
7+
from data_utils.utility import read_manifest
88
from data_utils.audio import AudioSegment
99

1010

1111
class NoisePerturbAugmentor(AugmentorBase):
1212
"""Augmentation model for adding background noise.
13-
13+
1414
:param rng: Random generator object.
1515
:type rng: random.Random
1616
:param min_snr_dB: Minimal signal noise ratio, in decibels.
1717
:type min_snr_dB: float
1818
:param max_snr_dB: Maximal signal noise ratio, in decibels.
1919
:type max_snr_dB: float
2020
:param noise_manifest_path: Manifest path for noise audio data.
21-
:type noise_manifest_path: basestring
21+
:type noise_manifest_path: basestring
2222
"""
2323

2424
def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path):
2525
self._min_snr_dB = min_snr_dB
2626
self._max_snr_dB = max_snr_dB
2727
self._rng = rng
28-
self._noise_manifest = utils.read_manifest(
29-
manifest_path=noise_manifest_path)
28+
self._noise_manifest = read_manifest(manifest_path=noise_manifest_path)
3029

3130
def transform_audio(self, audio_segment):
3231
"""Add background noise audio.

0 commit comments

Comments
 (0)