PaddlePaddle
diff --git a/‎globally_normalized_reader/.gitignore‎
Lines changed: 0 additions & 1 deletion b/‎globally_normalized_reader/.gitignore‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎globally_normalized_reader/README.md‎
Lines changed: 55 additions & 1 deletion b/‎globally_normalized_reader/README.md‎
Lines changed: 55 additions & 1 deletion
diff --git a/‎globally_normalized_reader/basic_modules.py‎
Lines changed: 68 additions & 22 deletions b/‎globally_normalized_reader/basic_modules.py‎
Lines changed: 68 additions & 22 deletions
@@ -1,3 +1,2 @@
-data
 *.txt
 *.pyc
@@ -1 +1,55 @@
-TBD
+# Globally Normalized Reader
+
+This model implements the work in the following paper:
+
+Jonathan Raiman and John Miller. Globally Normalized Reader. Empirical Methods in Natural Language Processing (EMNLP), 2017.
+
+If you use the dataset/code in your research, please cite the above paper:
+
+```text
+@inproceedings{raiman2015gnr,
+    author={Raiman, Jonathan and Miller, John},
+    booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
+    title={Globally Normalized Reader},
+    year={2017},
+}
+```
+
+You can also visit https://github.com/baidu-research/GloballyNormalizedReader to get more information.
+
+
+# Installation
+
+1. Please use [docker image](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) to install the latest PaddlePaddle, by running:
+    ```bash
+    docker pull paddledev/paddle
+    ```
+2. Download all necessary data by running:
+   ```bash
+   cd data && ./download.sh
+   ```
+3. Featurize the data by running:
+   ```
+   python featurize.py --datadir data --outdir featurized
+   ```
+
+# Training a Model
+
+- Configurate the model by modifying `config.py` if needed, and then run:
+
+    ```bash
+    python train.py 2>&1 | tee train.log
+    ```
+
+# Inferring by a Trained Model
+
+- Infer by a trained model by running:
+   ```bash
+   python infer.py \
+     --model_path models/pass_00000.tar.gz \
+     --data_dir data/featurized/ \
+     --batch_size 2 \
+     --use_gpu 0 \
+     --trainer_count 1 \
+     2>&1 | tee infer.log
+   ```
@@ -1,17 +1,50 @@
 #!/usr/bin/env python
 #coding=utf-8
+
 import collections
 
 import paddle.v2 as paddle
 from paddle.v2.layer import parse_network
 
 __all__ = [
     "stacked_bidirectional_lstm",
+    "stacked_bidirectional_lstm_by_nested_seq",
     "lstm_by_nested_sequence",
 ]
 
 
-def stacked_bidirectional_lstm(inputs, size, depth, drop_rate=0., prefix=""):
+def stacked_bidirectional_lstm(inputs,
+                               hidden_dim,
+                               depth,
+                               drop_rate=0.,
+                               prefix=""):
+    """ The stacked bi-directional LSTM.
+
+    In PaddlePaddle recurrent layers have two different implementations:
+    1. recurrent layer implemented by recurrent_group: any intermedia states a
+       recurent unit computes during one time step, such as hidden states,
+       input-to-hidden mapping, memory cells and so on, is accessable.
+    2. recurrent layer as a whole: only outputs of the recurrent layer are
+       accessable.
+
+    The second type (recurrent layer as a whole) is more computation efficient,
+    because recurrent_group is made up of many basic layers (including add,
+    element-wise multiplications, matrix multiplication and so on).
+
+    This function uses the second type to implement the stacked bi-directional
+    LSTM.
+
+    Arguments:
+        - inputs:      The input layer to the bi-directional LSTM.
+        - hidden_dim:  The dimension of the hidden state of the LSTM.
+        - depth:       Depth of the stacked bi-directional LSTM.
+        - drop_rate:   The drop rate to drop the LSTM output states.
+        - prefix:      A string which will be appended to name of each layer
+                       created in this function. Each layer in a network should
+                       has a unique name. The prefix makes this fucntion can be
+                       called multiple times.
+    """
+
     if not isinstance(inputs, collections.Sequence):
         inputs = [inputs]
 
@@ -20,7 +53,7 @@ def stacked_bidirectional_lstm(inputs, size, depth, drop_rate=0., prefix=""):
         for i in range(depth):
             input_proj = paddle.layer.mixed(
                 name="%s_in_proj_%0d_%s__" % (prefix, i, dirt),
-                size=size * 4,
+                size=hidden_dim * 4,
                 bias_attr=paddle.attr.Param(initial_std=0.),
                 input=[paddle.layer.full_matrix_projection(lstm)] if i else [
                     paddle.layer.full_matrix_projection(in_layer)
@@ -45,8 +78,8 @@ def stacked_bidirectional_lstm(inputs, size, depth, drop_rate=0., prefix=""):
 
 
 def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False):
-    '''
-    This is a LSTM implemended by nested recurrent_group.
+    """This is a LSTM implemended by nested recurrent_group.
+
     Paragraph is a nature nested sequence:
     1. each paragraph is a sequence of sentence.
     2. each sentence is a sequence of words.
@@ -60,7 +93,14 @@ def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False):
     5. Consequently, this function is just equivalent to concatenate all
        sentences in a paragraph into one (long) sentence, and use one LSTM to
        encode this new long sentence.
-    '''
+
+    Arguments:
+        - input_layer:    The input layer to the bi-directional LSTM.
+        - hidden_dim:     The dimension of the hidden state of the LSTM.
+        - name:           The name of the bi-directional LSTM.
+        - reverse:        The boolean parameter indicating whether to prcess
+                          the input sequence by the reverse order.
+    """
 
     def lstm_outer_step(lstm_group_input, hidden_dim, reverse, name=''):
         outer_memory = paddle.layer.memory(
@@ -71,9 +111,8 @@ def lstm_inner_step(input_layer, hidden_dim, reverse, name):
                 name="__inner_state_%s__" % name,
                 size=hidden_dim,
                 boot_layer=outer_memory)
-            input_proj = paddle.layer.fc(size=hidden_dim * 4,
-                                         bias_attr=False,
-                                         input=input_layer)
+            input_proj = paddle.layer.fc(
+                size=hidden_dim * 4, bias_attr=False, input=input_layer)
             return paddle.networks.lstmemory_unit(
                 input=input_proj,
                 name="__inner_state_%s__" % name,
@@ -111,7 +150,27 @@ def lstm_inner_step(input_layer, hidden_dim, reverse, name):
         reverse=reverse)
 
 
-def stacked_bi_lstm_by_nested_seq(input_layer, depth, hidden_dim, prefix=""):
+def stacked_bidirectional_lstm_by_nested_seq(input_layer,
+                                             depth,
+                                             hidden_dim,
+                                             prefix=""):
+    """ The stacked bi-directional LSTM to process a nested sequence.
+
+    The modules defined in this function is exactly equivalent to
+    that defined in stacked_bidirectional_lstm, the only difference is the
+    bi-directional LSTM defined in this function implemented by recurrent_group
+    in PaddlePaddle, and receive a nested sequence as its input.
+
+    Arguments:
+        - inputs:      The input layer to the bi-directional LSTM.
+        - hidden_dim:  The dimension of the hidden state of the LSTM.
+        - depth:       Depth of the stacked bi-directional LSTM.
+        - prefix:      A string which will be appended to name of each layer
+                       created in this function. Each layer in a network should
+                       has a unique name. The prefix makes this fucntion can be
+                       called multiple times.
+    """
+
     lstm_final_outs = []
     for dirt in ["fwd", "bwd"]:
         for i in range(depth):
@@ -122,16 +181,3 @@ def stacked_bi_lstm_by_nested_seq(input_layer, depth, hidden_dim, prefix=""):
                 reverse=(dirt == "bwd"))
         lstm_final_outs.append(lstm_out)
     return paddle.layer.concat(input=lstm_final_outs)
-
-
-if __name__ == "__main__":
-    vocab_size = 1024
-    emb_dim = 128
-    embedding = paddle.layer.embedding(
-        input=paddle.layer.data(
-            name="word",
-            type=paddle.data_type.integer_value_sub_sequence(vocab_size)),
-        size=emb_dim)
-    print(parse_network(
-        stacked_bi_lstm_by_nested_seq(
-            input_layer=embedding, depth=3, hidden_dim=128, prefix="__lstm")))