Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions globally_normalized_reader/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
*.txt
*.pyc
52 changes: 52 additions & 0 deletions globally_normalized_reader/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
# Globally Normalized Reader

This model implements the work in the following paper:

Jonathan Raiman and John Miller. Globally Normalized Reader. Empirical Methods in Natural Language Processing (EMNLP), 2017.

If you use the dataset/code in your research, please cite the above paper:

```text
@inproceedings{raiman2015gnr,
author={Raiman, Jonathan and Miller, John},
booktitle={Empirical Methods in Natural Language Processing (EMNLP)},
title={Globally Normalized Reader},
year={2017},
}
```

You can also visit https://github.com/baidu-research/GloballyNormalizedReader to get more information.


# Installation

1. Please use [docker image](http://doc.paddlepaddle.org/develop/doc/getstarted/build_and_install/docker_install_en.html) to install the latest PaddlePaddle, by running:
```bash
docker pull paddledev/paddle
```
2. Download all necessary data by running:
```bash
cd data && ./download.sh
```
3. **(TODO) add the preprocess and featurizer scripts.**

# Training a Model

- Configurate the model by modifying `config.py` if needed, and then run:

```bash
python train.py 2>&1 | tee train.log
```

# Inferring by a Trained Model

- Infer by a trained model by running:
```bash
python infer.py \
--model_path models/pass_00000.tar.gz \
--data_dir data/featurized/ \
--batch_size 2 \
--use_gpu 0 \
--trainer_count 1 \
2>&1 | tee infer.log
```
183 changes: 183 additions & 0 deletions globally_normalized_reader/basic_modules.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
#!/usr/bin/env python
#coding=utf-8

import collections

import paddle.v2 as paddle
from paddle.v2.layer import parse_network

__all__ = [
"stacked_bidirectional_lstm",
"stacked_bidirectional_lstm_by_nested_seq",
"lstm_by_nested_sequence",
]


def stacked_bidirectional_lstm(inputs,
hidden_dim,
depth,
drop_rate=0.,
prefix=""):
""" The stacked bi-directional LSTM.

In PaddlePaddle recurrent layers have two different implementations:
1. recurrent layer implemented by recurrent_group: any intermedia states a
recurent unit computes during one time step, such as hidden states,
input-to-hidden mapping, memory cells and so on, is accessable.
2. recurrent layer as a whole: only outputs of the recurrent layer are
accessable.

The second type (recurrent layer as a whole) is more computation efficient,
because recurrent_group is made up of many basic layers (including add,
element-wise multiplications, matrix multiplication and so on).

This function uses the second type to implement the stacked bi-directional
LSTM.

Arguments:
- inputs: The input layer to the bi-directional LSTM.
- hidden_dim: The dimension of the hidden state of the LSTM.
- depth: Depth of the stacked bi-directional LSTM.
- drop_rate: The drop rate to drop the LSTM output states.
- prefix: A string which will be appended to name of each layer
created in this function. Each layer in a network should
has a unique name. The prefix makes this fucntion can be
called multiple times.
"""

if not isinstance(inputs, collections.Sequence):
inputs = [inputs]

lstm_last = []
for dirt in ["fwd", "bwd"]:
for i in range(depth):
input_proj = paddle.layer.mixed(
name="%s_in_proj_%0d_%s__" % (prefix, i, dirt),
size=hidden_dim * 4,
bias_attr=paddle.attr.Param(initial_std=0.),
input=[paddle.layer.full_matrix_projection(lstm)] if i else [
paddle.layer.full_matrix_projection(in_layer)
for in_layer in inputs
])
lstm = paddle.layer.lstmemory(
input=input_proj,
bias_attr=paddle.attr.Param(initial_std=0.),
param_attr=paddle.attr.Param(initial_std=5e-4),
reverse=(dirt == "bwd"))
lstm_last.append(lstm)

final_states = paddle.layer.concat(input=[
paddle.layer.last_seq(input=lstm_last[0]),
paddle.layer.first_seq(input=lstm_last[1]),
])

lstm_outs = paddle.layer.concat(
input=lstm_last,
layer_attr=paddle.attr.ExtraLayerAttribute(drop_rate=drop_rate))
return final_states, lstm_outs


def lstm_by_nested_sequence(input_layer, hidden_dim, name="", reverse=False):
"""This is a LSTM implemended by nested recurrent_group.

Paragraph is a nature nested sequence:
1. each paragraph is a sequence of sentence.
2. each sentence is a sequence of words.

This function ueses the nested recurrent_group to implement LSTM.
1. The outer group iterates over sentence in a paragraph.
2. The inner group iterates over words in a sentence.
3. A LSTM is used to encode sentence, its final outputs is used to
initialize memory of the LSTM that is used to encode the next sentence.
4. Parameters are shared among these sentence-encoding LSTMs.
5. Consequently, this function is just equivalent to concatenate all
sentences in a paragraph into one (long) sentence, and use one LSTM to
encode this new long sentence.

Arguments:
- input_layer: The input layer to the bi-directional LSTM.
- hidden_dim: The dimension of the hidden state of the LSTM.
- name: The name of the bi-directional LSTM.
- reverse: The boolean parameter indicating whether to prcess
the input sequence by the reverse order.
"""

def lstm_outer_step(lstm_group_input, hidden_dim, reverse, name=''):
outer_memory = paddle.layer.memory(
name="__inner_%s_last__" % name, size=hidden_dim)

def lstm_inner_step(input_layer, hidden_dim, reverse, name):
inner_memory = paddle.layer.memory(
name="__inner_state_%s__" % name,
size=hidden_dim,
boot_layer=outer_memory)
input_proj = paddle.layer.fc(
size=hidden_dim * 4, bias_attr=False, input=input_layer)
return paddle.networks.lstmemory_unit(
input=input_proj,
name="__inner_state_%s__" % name,
out_memory=inner_memory,
size=hidden_dim,
act=paddle.activation.Tanh(),
gate_act=paddle.activation.Sigmoid(),
state_act=paddle.activation.Tanh())

inner_out = paddle.layer.recurrent_group(
name="__inner_%s__" % name,
step=lstm_inner_step,
reverse=reverse,
input=[lstm_group_input, hidden_dim, reverse, name])

if reverse:
inner_last_output = paddle.layer.first_seq(
input=inner_out,
name="__inner_%s_last__" % name,
agg_level=paddle.layer.AggregateLevel.TO_NO_SEQUENCE)
else:
inner_last_output = paddle.layer.last_seq(
input=inner_out,
name="__inner_%s_last__" % name,
agg_level=paddle.layer.AggregateLevel.TO_NO_SEQUENCE)
return inner_out

return paddle.layer.recurrent_group(
input=[
paddle.layer.SubsequenceInput(input_layer), hidden_dim, reverse,
name
],
step=lstm_outer_step,
name="__outter_%s__" % name,
reverse=reverse)


def stacked_bidirectional_lstm_by_nested_seq(input_layer,
depth,
hidden_dim,
prefix=""):
""" The stacked bi-directional LSTM to process a nested sequence.

The modules defined in this function is exactly equivalent to
that defined in stacked_bidirectional_lstm, the only difference is the
bi-directional LSTM defined in this function implemented by recurrent_group
in PaddlePaddle, and receive a nested sequence as its input.

Arguments:
- inputs: The input layer to the bi-directional LSTM.
- hidden_dim: The dimension of the hidden state of the LSTM.
- depth: Depth of the stacked bi-directional LSTM.
- prefix: A string which will be appended to name of each layer
created in this function. Each layer in a network should
has a unique name. The prefix makes this fucntion can be
called multiple times.
"""

lstm_final_outs = []
for dirt in ["fwd", "bwd"]:
for i in range(depth):
lstm_out = lstm_by_nested_sequence(
input_layer=(lstm_out if i else input_layer),
hidden_dim=hidden_dim,
name="__%s_%s_%02d__" % (prefix, dirt, i),
reverse=(dirt == "bwd"))
lstm_final_outs.append(lstm_out)
return paddle.layer.concat(input=lstm_final_outs)
Loading