Skip to content
Closed
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions regression/regression.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
import paddle.v2 as paddle
import sys


def regression_net(input1_dict_dim, input2_dict_dim, is_generating=False):
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

is_generating is not used in the configuration, so it can be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

### Network Architecture
word_vector_dim = 512
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

word_vector_dim is not used in the configuration, so it can be removed.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

word_emb_dim = 512
hidden_dim = 512

#input2 architecture
input2 = paddle.layer.data(
name='input2_word',
type=paddle.data_type.integer_value_sequence(input2_dict_dim))

with paddle.layer.mixed(size=word_emb_dim, bias_attr=False) as input2_emb:
input2_emb += paddle.layer.table_projection(
input=input2,
param_attr=paddle.attr.Param(name='_emb_basic', is_static=True))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 16 to 19, please use paddle.layer.embedding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


input2_vec = paddle.layer.pooling(
input=input2_emb,
pooling_type=paddle.pooling.Sum(),
#act=paddle.activation.Tanh(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please remove the comments if it is useless.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

bias_attr=paddle.attr.ParameterAttribute(
name='_avg.bias_basic', is_static=True))

hidden_input2 = paddle.layer.fc(
input=input2_vec,
size=hidden_dim,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.Param(name='_hidden_input2.w', is_static=True),
bias_attr=paddle.attr.ParameterAttribute(
name='_hidden_input2.bias', is_static=True))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please make line 5 ~ 34 a function and make is_static a parameter, because it is repeated with line 37 ~ 59.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


#input1 architecture
input1 = paddle.layer.data(
name='input1_word',
type=paddle.data_type.integer_value_sequence(input1_dict_dim))

with paddle.layer.mixed(size=word_emb_dim, bias_attr=False) as input1_emb:
input1_emb += paddle.layer.table_projection(
input=input1,
param_attr=paddle.attr.Param(name='emb_input1', initial_std=0.02))
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

line 36 ~ 44, please use paddle.layer.embedding.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


input1_vec = paddle.layer.pooling(
input=input1_emb,
pooling_type=paddle.pooling.Sum(),
#act=paddle.activation.Tanh(),
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

please remove the useless comment.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

bias_attr=paddle.attr.ParameterAttribute(
name='_avg.bias_input1', initial_std=0.01))

hidden_input1 = paddle.layer.fc(
input=input1_vec,
size=hidden_dim,
act=paddle.activation.Tanh(),
param_attr=paddle.attr.ParameterAttribute(
name='_hidden_input1.w', initial_std=0.03),
bias_attr=paddle.attr.ParameterAttribute(name='_hidden_input1.bias'))

cost = paddle.layer.mse_cost(input=hidden_input1, label=hidden_input2)
#cost = paddle.layer.huber_cost(input=hidden_input1, label=hidden_input2)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Remove line 62. Different costs should be chosen by user defined parameter, or create an other function.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

return cost


def main():
paddle.init(use_gpu=False, trainer_count=1)
is_generating = False

#input1 and input2 dict dim
dict_size = 30000
input1_dict_dim = input2_dict_dim = dict_size

#train the network
if not is_generating:
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This example does not need to generate anything. is_generating is not appropriate.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done


cost = regression_net(input1_dict_dim, input2_dict_dim)
parameters = paddle.parameters.create(cost)

# define optimize method and trainer
optimizer = paddle.optimizer.Adam(
learning_rate=5e-5,
regularization=paddle.optimizer.L2Regularization(rate=8e-4))

trainer = paddle.trainer.SGD(
cost=cost, parameters=parameters, update_equation=optimizer)

# define data reader
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If to test whether the configuration can run, it is no problem to use the WMT14 machine translation dataset, but it is inappropriate to be used in the examples to the users, because such inputs may not reasonable. I suggest giving several example data.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

done

wmt14_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt14.train(dict_size), buf_size=8192),
batch_size=5)

# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 100 == 0:
print "\nPass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost)
if isinstance(event, paddle.event.EndPass):
result = trainer.test(reader=paddle.batch(
paddle.dataset.wmt14.test(dict_size), batch_size=2))
print "Test %d, Cost %f" % (event.pass_id, result.cost)

# start to train
trainer.train(
reader=wmt14_reader, event_handler=event_handler, num_passes=2)


if __name__ == '__main__':
main()