|
| 1 | +#! /usr/bin/python |
| 2 | +# -*- coding: utf-8 -*- |
| 3 | + |
| 4 | +import tensorflow as tf |
| 5 | +import tensorlayer as tl |
| 6 | +import numpy as np |
| 7 | +from tensorlayer.models import Model |
| 8 | +from tensorlayer.layers import Dense, Dropout, Input |
| 9 | +from tensorlayer.layers.core import Layer |
| 10 | + |
| 11 | + |
| 12 | +class Seq2seq(Model): |
| 13 | + """vanilla stacked layer Seq2Seq model. |
| 14 | +
|
| 15 | + Parameters |
| 16 | + ---------- |
| 17 | + decoder_seq_length: int |
| 18 | + The length of your target sequence |
| 19 | + cell_enc : str, tf.function |
| 20 | + The RNN function cell for your encoder stack, e.g tf.keras.layers.GRUCell |
| 21 | + cell_dec : str, tf.function |
| 22 | + The RNN function cell for your decoder stack, e.g. tf.keras.layers.GRUCell |
| 23 | + n_layer : int |
| 24 | + The number of your RNN layers for both encoder and decoder block |
| 25 | + embedding_layer : tl.Layer |
| 26 | + A embedding layer, e.g. tl.layers.Embedding(vocabulary_size=voc_size, embedding_size=emb_dim) |
| 27 | + name : str |
| 28 | + The model name |
| 29 | + |
| 30 | + Examples |
| 31 | + --------- |
| 32 | + Classify stacked-layer Seq2Seq model, see `chatbot <https://github.com/tensorlayer/seq2seq-chatbot>`__ |
| 33 | +
|
| 34 | + Returns |
| 35 | + ------- |
| 36 | + static stacked-layer Seq2Seq model. |
| 37 | + """ |
| 38 | + |
| 39 | + def __init__(self, decoder_seq_length, cell_enc, cell_dec, n_units=256, n_layer=3, embedding_layer=None, name=None): |
| 40 | + super(Seq2seq, self).__init__(name=name) |
| 41 | + self.embedding_layer = embedding_layer |
| 42 | + self.vocabulary_size = embedding_layer.vocabulary_size |
| 43 | + self.embedding_size = embedding_layer.embedding_size |
| 44 | + self.n_layer = n_layer |
| 45 | + self.enc_layers = [] |
| 46 | + self.dec_layers = [] |
| 47 | + for i in range(n_layer): |
| 48 | + if (i == 0): |
| 49 | + self.enc_layers.append( |
| 50 | + tl.layers.RNN( |
| 51 | + cell=cell_enc(units=n_units), in_channels=self.embedding_size, return_last_state=True |
| 52 | + ) |
| 53 | + ) |
| 54 | + else: |
| 55 | + self.enc_layers.append( |
| 56 | + tl.layers.RNN(cell=cell_enc(units=n_units), in_channels=n_units, return_last_state=True) |
| 57 | + ) |
| 58 | + |
| 59 | + for i in range(n_layer): |
| 60 | + if (i == 0): |
| 61 | + self.dec_layers.append( |
| 62 | + tl.layers.RNN( |
| 63 | + cell=cell_dec(units=n_units), in_channels=self.embedding_size, return_last_state=True |
| 64 | + ) |
| 65 | + ) |
| 66 | + else: |
| 67 | + self.dec_layers.append( |
| 68 | + tl.layers.RNN(cell=cell_dec(units=n_units), in_channels=n_units, return_last_state=True) |
| 69 | + ) |
| 70 | + |
| 71 | + self.reshape_layer = tl.layers.Reshape([-1, n_units]) |
| 72 | + self.dense_layer = tl.layers.Dense(n_units=self.vocabulary_size, in_channels=n_units) |
| 73 | + self.reshape_layer_after = tl.layers.Reshape([-1, decoder_seq_length, self.vocabulary_size]) |
| 74 | + self.reshape_layer_individual_sequence = tl.layers.Reshape([-1, 1, self.vocabulary_size]) |
| 75 | + |
| 76 | + def inference(self, encoding, seq_length, start_token, top_n): |
| 77 | + """Inference mode""" |
| 78 | + """ |
| 79 | + Parameters |
| 80 | + ---------- |
| 81 | + encoding : input tensor |
| 82 | + The source sequences |
| 83 | + seq_length : int |
| 84 | + The expected length of your predicted sequence. |
| 85 | + start_token : int |
| 86 | + <SOS> : The token of "start of sequence" |
| 87 | + top_n : int |
| 88 | + Random search algorithm based on the top top_n words sorted by the probablity. |
| 89 | + """ |
| 90 | + feed_output = self.embedding_layer(encoding[0]) |
| 91 | + state = [None for i in range(self.n_layer)] |
| 92 | + |
| 93 | + for i in range(self.n_layer): |
| 94 | + feed_output, state[i] = self.enc_layers[i](feed_output, return_state=True) |
| 95 | + batch_size = len(encoding[0].numpy()) |
| 96 | + decoding = [[start_token] for i in range(batch_size)] |
| 97 | + feed_output = self.embedding_layer(decoding) |
| 98 | + for i in range(self.n_layer): |
| 99 | + feed_output, state[i] = self.dec_layers[i](feed_output, initial_state=state[i], return_state=True) |
| 100 | + |
| 101 | + feed_output = self.reshape_layer(feed_output) |
| 102 | + feed_output = self.dense_layer(feed_output) |
| 103 | + feed_output = self.reshape_layer_individual_sequence(feed_output) |
| 104 | + feed_output = tf.argmax(feed_output, -1) |
| 105 | + # [B, 1] |
| 106 | + final_output = feed_output |
| 107 | + |
| 108 | + for i in range(seq_length - 1): |
| 109 | + feed_output = self.embedding_layer(feed_output) |
| 110 | + for i in range(self.n_layer): |
| 111 | + feed_output, state[i] = self.dec_layers[i](feed_output, initial_state=state[i], return_state=True) |
| 112 | + feed_output = self.reshape_layer(feed_output) |
| 113 | + feed_output = self.dense_layer(feed_output) |
| 114 | + feed_output = self.reshape_layer_individual_sequence(feed_output) |
| 115 | + ori_feed_output = feed_output |
| 116 | + if (top_n is not None): |
| 117 | + for k in range(batch_size): |
| 118 | + idx = np.argpartition(ori_feed_output[k][0], -top_n)[-top_n:] |
| 119 | + probs = [ori_feed_output[k][0][i] for i in idx] |
| 120 | + probs = probs / np.sum(probs) |
| 121 | + feed_output = np.random.choice(idx, p=probs) |
| 122 | + feed_output = tf.convert_to_tensor([[feed_output]], dtype=tf.int64) |
| 123 | + if (k == 0): |
| 124 | + final_output_temp = feed_output |
| 125 | + else: |
| 126 | + final_output_temp = tf.concat([final_output_temp, feed_output], 0) |
| 127 | + feed_output = final_output_temp |
| 128 | + else: |
| 129 | + feed_output = tf.argmax(feed_output, -1) |
| 130 | + final_output = tf.concat([final_output, feed_output], 1) |
| 131 | + |
| 132 | + return final_output, state |
| 133 | + |
| 134 | + def forward(self, inputs, seq_length=20, start_token=None, return_state=False, top_n=None): |
| 135 | + |
| 136 | + state = [None for i in range(self.n_layer)] |
| 137 | + if (self.is_train): |
| 138 | + encoding = inputs[0] |
| 139 | + enc_output = self.embedding_layer(encoding) |
| 140 | + |
| 141 | + for i in range(self.n_layer): |
| 142 | + enc_output, state[i] = self.enc_layers[i](enc_output, return_state=True) |
| 143 | + |
| 144 | + decoding = inputs[1] |
| 145 | + dec_output = self.embedding_layer(decoding) |
| 146 | + |
| 147 | + for i in range(self.n_layer): |
| 148 | + dec_output, state[i] = self.dec_layers[i](dec_output, initial_state=state[i], return_state=True) |
| 149 | + |
| 150 | + dec_output = self.reshape_layer(dec_output) |
| 151 | + denser_output = self.dense_layer(dec_output) |
| 152 | + output = self.reshape_layer_after(denser_output) |
| 153 | + else: |
| 154 | + encoding = inputs |
| 155 | + output, state = self.inference(encoding, seq_length, start_token, top_n) |
| 156 | + |
| 157 | + if (return_state): |
| 158 | + return output, state |
| 159 | + else: |
| 160 | + return output |
0 commit comments