Skip to content

Commit 0fe70a7

Browse files
ArnoldLIULJzsdonghao
authored andcommitted
Seq2seq (#989)
* add seq2seq model; add seq2seq test * Revert "add seq2seq model; add seq2seq test" This reverts commit 6401d54. * UNDO last commit * Print list instead of tensor * Add comments * ADD batch testing at Inference * ADD batch testing at Inference * FIX typo and ADD some comments * FIX the Travis CI build * Delete unnecessary files * Delete unnecessary files * Delete unnecessary files
1 parent f26e4ba commit 0fe70a7

File tree

2 files changed

+256
-0
lines changed

2 files changed

+256
-0
lines changed

tensorlayer/models/seq2seq.py

Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
#! /usr/bin/python
2+
# -*- coding: utf-8 -*-
3+
4+
import tensorflow as tf
5+
import tensorlayer as tl
6+
import numpy as np
7+
from tensorlayer.models import Model
8+
from tensorlayer.layers import Dense, Dropout, Input
9+
from tensorlayer.layers.core import Layer
10+
11+
12+
class Seq2seq(Model):
13+
"""vanilla stacked layer Seq2Seq model.
14+
15+
Parameters
16+
----------
17+
decoder_seq_length: int
18+
The length of your target sequence
19+
cell_enc : str, tf.function
20+
The RNN function cell for your encoder stack, e.g tf.keras.layers.GRUCell
21+
cell_dec : str, tf.function
22+
The RNN function cell for your decoder stack, e.g. tf.keras.layers.GRUCell
23+
n_layer : int
24+
The number of your RNN layers for both encoder and decoder block
25+
embedding_layer : tl.Layer
26+
A embedding layer, e.g. tl.layers.Embedding(vocabulary_size=voc_size, embedding_size=emb_dim)
27+
name : str
28+
The model name
29+
30+
Examples
31+
---------
32+
Classify stacked-layer Seq2Seq model, see `chatbot <https://github.com/tensorlayer/seq2seq-chatbot>`__
33+
34+
Returns
35+
-------
36+
static stacked-layer Seq2Seq model.
37+
"""
38+
39+
def __init__(self, decoder_seq_length, cell_enc, cell_dec, n_units=256, n_layer=3, embedding_layer=None, name=None):
40+
super(Seq2seq, self).__init__(name=name)
41+
self.embedding_layer = embedding_layer
42+
self.vocabulary_size = embedding_layer.vocabulary_size
43+
self.embedding_size = embedding_layer.embedding_size
44+
self.n_layer = n_layer
45+
self.enc_layers = []
46+
self.dec_layers = []
47+
for i in range(n_layer):
48+
if (i == 0):
49+
self.enc_layers.append(
50+
tl.layers.RNN(
51+
cell=cell_enc(units=n_units), in_channels=self.embedding_size, return_last_state=True
52+
)
53+
)
54+
else:
55+
self.enc_layers.append(
56+
tl.layers.RNN(cell=cell_enc(units=n_units), in_channels=n_units, return_last_state=True)
57+
)
58+
59+
for i in range(n_layer):
60+
if (i == 0):
61+
self.dec_layers.append(
62+
tl.layers.RNN(
63+
cell=cell_dec(units=n_units), in_channels=self.embedding_size, return_last_state=True
64+
)
65+
)
66+
else:
67+
self.dec_layers.append(
68+
tl.layers.RNN(cell=cell_dec(units=n_units), in_channels=n_units, return_last_state=True)
69+
)
70+
71+
self.reshape_layer = tl.layers.Reshape([-1, n_units])
72+
self.dense_layer = tl.layers.Dense(n_units=self.vocabulary_size, in_channels=n_units)
73+
self.reshape_layer_after = tl.layers.Reshape([-1, decoder_seq_length, self.vocabulary_size])
74+
self.reshape_layer_individual_sequence = tl.layers.Reshape([-1, 1, self.vocabulary_size])
75+
76+
def inference(self, encoding, seq_length, start_token, top_n):
77+
"""Inference mode"""
78+
"""
79+
Parameters
80+
----------
81+
encoding : input tensor
82+
The source sequences
83+
seq_length : int
84+
The expected length of your predicted sequence.
85+
start_token : int
86+
<SOS> : The token of "start of sequence"
87+
top_n : int
88+
Random search algorithm based on the top top_n words sorted by the probablity.
89+
"""
90+
feed_output = self.embedding_layer(encoding[0])
91+
state = [None for i in range(self.n_layer)]
92+
93+
for i in range(self.n_layer):
94+
feed_output, state[i] = self.enc_layers[i](feed_output, return_state=True)
95+
batch_size = len(encoding[0].numpy())
96+
decoding = [[start_token] for i in range(batch_size)]
97+
feed_output = self.embedding_layer(decoding)
98+
for i in range(self.n_layer):
99+
feed_output, state[i] = self.dec_layers[i](feed_output, initial_state=state[i], return_state=True)
100+
101+
feed_output = self.reshape_layer(feed_output)
102+
feed_output = self.dense_layer(feed_output)
103+
feed_output = self.reshape_layer_individual_sequence(feed_output)
104+
feed_output = tf.argmax(feed_output, -1)
105+
# [B, 1]
106+
final_output = feed_output
107+
108+
for i in range(seq_length - 1):
109+
feed_output = self.embedding_layer(feed_output)
110+
for i in range(self.n_layer):
111+
feed_output, state[i] = self.dec_layers[i](feed_output, initial_state=state[i], return_state=True)
112+
feed_output = self.reshape_layer(feed_output)
113+
feed_output = self.dense_layer(feed_output)
114+
feed_output = self.reshape_layer_individual_sequence(feed_output)
115+
ori_feed_output = feed_output
116+
if (top_n is not None):
117+
for k in range(batch_size):
118+
idx = np.argpartition(ori_feed_output[k][0], -top_n)[-top_n:]
119+
probs = [ori_feed_output[k][0][i] for i in idx]
120+
probs = probs / np.sum(probs)
121+
feed_output = np.random.choice(idx, p=probs)
122+
feed_output = tf.convert_to_tensor([[feed_output]], dtype=tf.int64)
123+
if (k == 0):
124+
final_output_temp = feed_output
125+
else:
126+
final_output_temp = tf.concat([final_output_temp, feed_output], 0)
127+
feed_output = final_output_temp
128+
else:
129+
feed_output = tf.argmax(feed_output, -1)
130+
final_output = tf.concat([final_output, feed_output], 1)
131+
132+
return final_output, state
133+
134+
def forward(self, inputs, seq_length=20, start_token=None, return_state=False, top_n=None):
135+
136+
state = [None for i in range(self.n_layer)]
137+
if (self.is_train):
138+
encoding = inputs[0]
139+
enc_output = self.embedding_layer(encoding)
140+
141+
for i in range(self.n_layer):
142+
enc_output, state[i] = self.enc_layers[i](enc_output, return_state=True)
143+
144+
decoding = inputs[1]
145+
dec_output = self.embedding_layer(decoding)
146+
147+
for i in range(self.n_layer):
148+
dec_output, state[i] = self.dec_layers[i](dec_output, initial_state=state[i], return_state=True)
149+
150+
dec_output = self.reshape_layer(dec_output)
151+
denser_output = self.dense_layer(dec_output)
152+
output = self.reshape_layer_after(denser_output)
153+
else:
154+
encoding = inputs
155+
output, state = self.inference(encoding, seq_length, start_token, top_n)
156+
157+
if (return_state):
158+
return output, state
159+
else:
160+
return output

tests/models/test_seq2seq_model.py

Lines changed: 96 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,96 @@
1+
#!/usr/bin/env python
2+
# -*- coding: utf-8 -*-
3+
4+
import os
5+
import unittest
6+
7+
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
8+
9+
import numpy as np
10+
import tensorflow as tf
11+
import tensorlayer as tl
12+
from tqdm import tqdm
13+
from sklearn.utils import shuffle
14+
from tensorlayer.models.seq2seq import Seq2seq
15+
from tests.utils import CustomTestCase
16+
from tensorlayer.cost import cross_entropy_seq
17+
18+
19+
class Model_SEQ2SEQ_Test(CustomTestCase):
20+
21+
@classmethod
22+
def setUpClass(cls):
23+
24+
cls.batch_size = 16
25+
26+
cls.vocab_size = 20
27+
cls.embedding_size = 32
28+
cls.dec_seq_length = 5
29+
cls.trainX = np.random.randint(20, size=(50, 6))
30+
cls.trainY = np.random.randint(20, size=(50, cls.dec_seq_length + 1))
31+
cls.trainY[:, 0] = 0 # start_token == 0
32+
33+
# Parameters
34+
cls.src_len = len(cls.trainX)
35+
cls.tgt_len = len(cls.trainY)
36+
37+
assert cls.src_len == cls.tgt_len
38+
39+
cls.num_epochs = 100
40+
cls.n_step = cls.src_len // cls.batch_size
41+
42+
@classmethod
43+
def tearDownClass(cls):
44+
pass
45+
46+
def test_basic_simpleSeq2Seq(self):
47+
model_ = Seq2seq(
48+
decoder_seq_length=5,
49+
cell_enc=tf.keras.layers.GRUCell,
50+
cell_dec=tf.keras.layers.GRUCell,
51+
n_layer=3,
52+
n_units=128,
53+
embedding_layer=tl.layers.Embedding(vocabulary_size=self.vocab_size, embedding_size=self.embedding_size),
54+
)
55+
56+
optimizer = tf.optimizers.Adam(learning_rate=0.001)
57+
58+
for epoch in range(self.num_epochs):
59+
model_.train()
60+
trainX, trainY = shuffle(self.trainX, self.trainY)
61+
total_loss, n_iter = 0, 0
62+
for X, Y in tqdm(tl.iterate.minibatches(inputs=trainX, targets=trainY, batch_size=self.batch_size,
63+
shuffle=False), total=self.n_step,
64+
desc='Epoch[{}/{}]'.format(epoch + 1, self.num_epochs), leave=False):
65+
66+
dec_seq = Y[:, :-1]
67+
target_seq = Y[:, 1:]
68+
69+
with tf.GradientTape() as tape:
70+
## compute outputs
71+
output = model_(inputs=[X, dec_seq])
72+
73+
output = tf.reshape(output, [-1, self.vocab_size])
74+
75+
loss = cross_entropy_seq(logits=output, target_seqs=target_seq)
76+
77+
grad = tape.gradient(loss, model_.all_weights)
78+
optimizer.apply_gradients(zip(grad, model_.all_weights))
79+
80+
total_loss += loss
81+
n_iter += 1
82+
83+
model_.eval()
84+
test_sample = trainX[0:2, :].tolist()
85+
86+
top_n = 1
87+
for i in range(top_n):
88+
prediction = model_([test_sample], seq_length=self.dec_seq_length, start_token=0, top_n=1)
89+
print("Prediction: >>>>> ", prediction, "\n Target: >>>>> ", trainY[0:2, 1:], "\n\n")
90+
91+
# printing average loss after every epoch
92+
print('Epoch [{}/{}]: loss {:.4f}'.format(epoch + 1, self.num_epochs, total_loss / n_iter))
93+
94+
95+
if __name__ == '__main__':
96+
unittest.main()

0 commit comments

Comments
 (0)