PaddlePaddle
diff --git a/‎deep_speech_2/README.md‎
100755100644
Lines changed: 2 additions & 0 deletions b/‎deep_speech_2/README.md‎
100755100644
Lines changed: 2 additions & 0 deletions
diff --git a/‎deep_speech_2/cloud/README.md‎
100755100644 b/‎deep_speech_2/cloud/README.md‎
100755100644
diff --git a/‎deep_speech_2/cloud/pcloud_submit.sh‎
Lines changed: 5 additions & 3 deletions b/‎deep_speech_2/cloud/pcloud_submit.sh‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎deep_speech_2/cloud/pcloud_train.sh‎
Lines changed: 26 additions & 6 deletions b/‎deep_speech_2/cloud/pcloud_train.sh‎
Lines changed: 26 additions & 6 deletions
diff --git a/‎deep_speech_2/cloud/pcloud_upload_data.sh‎
Lines changed: 7 additions & 2 deletions b/‎deep_speech_2/cloud/pcloud_upload_data.sh‎
Lines changed: 7 additions & 2 deletions
diff --git a/‎…ep_speech_2/datasets/vocab/eng_vocab.txt‎ ‎…_speech_2/data/librispeech/eng_vocab.txt‎deep_speech_2/datasets/vocab/eng_vocab.txt renamed to deep_speech_2/data/librispeech/eng_vocab.txt b/‎…ep_speech_2/datasets/vocab/eng_vocab.txt‎ ‎…_speech_2/data/librispeech/eng_vocab.txt‎deep_speech_2/datasets/vocab/eng_vocab.txt renamed to deep_speech_2/data/librispeech/eng_vocab.txt
diff --git a/‎…ch_2/datasets/librispeech/librispeech.py‎ ‎…speech_2/data/librispeech/librispeech.py‎deep_speech_2/datasets/librispeech/librispeech.py renamed to deep_speech_2/data/librispeech/librispeech.py b/‎…ch_2/datasets/librispeech/librispeech.py‎ ‎…speech_2/data/librispeech/librispeech.py‎deep_speech_2/datasets/librispeech/librispeech.py renamed to deep_speech_2/data/librispeech/librispeech.py
diff --git a/‎…ch_2/datasets/noise/chime3_background.py‎ ‎…speech_2/data/noise/chime3_background.py‎deep_speech_2/datasets/noise/chime3_background.py renamed to deep_speech_2/data/noise/chime3_background.py b/‎…ch_2/datasets/noise/chime3_background.py‎ ‎…speech_2/data/noise/chime3_background.py‎deep_speech_2/datasets/noise/chime3_background.py renamed to deep_speech_2/data/noise/chime3_background.py
diff --git a/‎deep_speech_2/data_utils/augmentor/impulse_response.py‎
Lines changed: 4 additions & 5 deletions b/‎deep_speech_2/data_utils/augmentor/impulse_response.py‎
Lines changed: 4 additions & 5 deletions
diff --git a/‎deep_speech_2/data_utils/augmentor/noise_perturb.py‎
Lines changed: 4 additions & 5 deletions b/‎deep_speech_2/data_utils/augmentor/noise_perturb.py‎
Lines changed: 4 additions & 5 deletions
@@ -1,5 +1,7 @@
 # DeepSpeech2 on PaddlePaddle
 
+>TODO: to be updated, since the directory hierarchy was changed.
+
 ## Installation
 
 ```
 
@@ -1,7 +1,9 @@
-TRAIN_MANIFEST="cloud/cloud.manifest.train"
-DEV_MANIFEST="cloud/cloud.manifest.dev"
+#! /usr/bin/bash
+
+TRAIN_MANIFEST="cloud/cloud_manifests/cloud.manifest.train"
+DEV_MANIFEST="cloud/cloud_manifests/cloud.manifest.dev"
 CLOUD_MODEL_DIR="./checkpoints"
-BATCH_SIZE=256
+BATCH_SIZE=512
 NUM_GPU=8
 NUM_NODE=1
 IS_LOCAL="True"
 
@@ -1,3 +1,5 @@
+#! /usr/bin/bash
+
 TRAIN_MANIFEST=$1
 DEV_MANIFEST=$2
 MODEL_PATH=$3
@@ -14,11 +16,29 @@ python ./cloud/split_data.py \
 --out_manifest_path='/local.manifest.dev'
 
 python -u train.py \
---batch_size=$BATCH_SIZE \
---use_gpu=1 \
+--batch_size=${BATCH_SIZE} \
 --trainer_count=${NUM_GPU} \
---num_threads_data=${NUM_GPU} \
+--num_passes=200 \
+--num_proc_data=${NUM_GPU} \
+--num_conv_layers=2 \
+--num_rnn_layers=3 \
+--rnn_layer_size=2048 \
+--num_iter_print=100 \
+--learning_rate=5e-4 \
+--max_duration=27.0 \
+--min_duration=0.0 \
+--use_sortagrad=True \
+--use_gru=False \
+--use_gpu=True \
 --is_local=${IS_LOCAL} \
---train_manifest_path='/local.manifest.train' \
---dev_manifest_path='/local.manifest.dev' \
---output_model_dir=${MODEL_PATH} 2>&1 | tee ./log/train.log
+--share_rnn_weights=True \
+--train_manifest='/local.manifest.train' \
+--dev_manifest='/local.manifest.dev' \
+--mean_std_path='data/librispeech/mean_std.npz' \
+--vocab_path='data/librispeech/eng_vocab.txt' \
+--output_model_dir='./checkpoints' \
+--output_model_dir=${MODEL_PATH} \
+--augment_conf_path='conf/augmentation.config' \
+--specgram_type='linear' \
+--shuffle_method='batch_shuffle_clipped' \
+2>&1 | tee ./log/train.log
@@ -1,5 +1,9 @@
-IN_MANIFESTS="../datasets/manifest.train ../datasets/manifest.dev ../datasets/manifest.test"
-OUT_MANIFESTS="./cloud.manifest.train ./cloud.manifest.dev ./cloud.manifest.test"
+#! /usr/bin/bash
+
+mkdir cloud_manifests
+
+IN_MANIFESTS="../data/librispeech/manifest.train ../data/librispeech/manifest.dev-clean ../data/librispeech/manifest.test-clean"
+OUT_MANIFESTS="cloud_manifests/cloud.manifest.train cloud_manifests/cloud.manifest.dev cloud_manifests/cloud.manifest.test"
 CLOUD_DATA_DIR="/pfs/dlnel/home/USERNAME/deepspeech2/data/librispeech"
 NUM_SHARDS=50
 
@@ -14,4 +18,5 @@ then
     echo "Upload Data Failed!"
     exit 1
 fi
+
 echo "All Done."
@@ -4,23 +4,22 @@
 from __future__ import print_function
 
 from data_utils.augmentor.base import AugmentorBase
-from data_utils import utils
+from data_utils.utility import read_manifest
 from data_utils.audio import AudioSegment
 
 
 class ImpulseResponseAugmentor(AugmentorBase):
     """Augmentation model for adding impulse response effect.
-    
+
     :param rng: Random generator object.
     :type rng: random.Random
     :param impulse_manifest_path: Manifest path for impulse audio data.
-    :type impulse_manifest_path: basestring 
+    :type impulse_manifest_path: basestring
     """
 
     def __init__(self, rng, impulse_manifest_path):
         self._rng = rng
-        self._impulse_manifest = utils.read_manifest(
-            manifest_path=impulse_manifest_path)
+        self._impulse_manifest = read_manifest(impulse_manifest_path)
 
     def transform_audio(self, audio_segment):
         """Add impulse response effect.
 
@@ -4,29 +4,28 @@
 from __future__ import print_function
 
 from data_utils.augmentor.base import AugmentorBase
-from data_utils import utils
+from data_utils.utility import read_manifest
 from data_utils.audio import AudioSegment
 
 
 class NoisePerturbAugmentor(AugmentorBase):
     """Augmentation model for adding background noise.
-    
+
     :param rng: Random generator object.
     :type rng: random.Random
     :param min_snr_dB: Minimal signal noise ratio, in decibels.
     :type min_snr_dB: float
     :param max_snr_dB: Maximal signal noise ratio, in decibels.
     :type max_snr_dB: float
     :param noise_manifest_path: Manifest path for noise audio data.
-    :type noise_manifest_path: basestring 
+    :type noise_manifest_path: basestring
     """
 
     def __init__(self, rng, min_snr_dB, max_snr_dB, noise_manifest_path):
         self._min_snr_dB = min_snr_dB
         self._max_snr_dB = max_snr_dB
         self._rng = rng
-        self._noise_manifest = utils.read_manifest(
-            manifest_path=noise_manifest_path)
+        self._noise_manifest = read_manifest(manifest_path=noise_manifest_path)
 
     def transform_audio(self, audio_segment):
         """Add background noise audio.