-
Notifications
You must be signed in to change notification settings - Fork 2.9k
add regression demo config ,fixed #10 #34
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from 2 commits
692ba92
19ba12f
27eeeaa
b54a56d
6621738
fec6c37
3786025
77c250a
529029b
69171f5
ac5adfa
43030fe
03cd2de
7bd8818
59532f6
956302c
2656669
da1d7e4
0e29a68
000bc3c
c503caa
f19eeb9
7a16729
65a5c90
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,111 @@ | ||
| import paddle.v2 as paddle | ||
| import sys | ||
|
|
||
|
|
||
| def regression_net(input1_dict_dim, input2_dict_dim, is_generating=False): | ||
| ### Network Architecture | ||
| word_vector_dim = 512 | ||
|
||
| word_emb_dim = 512 | ||
| hidden_dim = 512 | ||
|
|
||
| #input2 architecture | ||
| input2 = paddle.layer.data( | ||
| name='input2_word', | ||
| type=paddle.data_type.integer_value_sequence(input2_dict_dim)) | ||
|
|
||
| with paddle.layer.mixed(size=word_emb_dim, bias_attr=False) as input2_emb: | ||
| input2_emb += paddle.layer.table_projection( | ||
| input=input2, | ||
| param_attr=paddle.attr.Param(name='_emb_basic', is_static=True)) | ||
|
||
|
|
||
| input2_vec = paddle.layer.pooling( | ||
| input=input2_emb, | ||
| pooling_type=paddle.pooling.Sum(), | ||
| #act=paddle.activation.Tanh(), | ||
|
||
| bias_attr=paddle.attr.ParameterAttribute( | ||
| name='_avg.bias_basic', is_static=True)) | ||
|
|
||
| hidden_input2 = paddle.layer.fc( | ||
| input=input2_vec, | ||
| size=hidden_dim, | ||
| act=paddle.activation.Tanh(), | ||
| param_attr=paddle.attr.Param(name='_hidden_input2.w', is_static=True), | ||
| bias_attr=paddle.attr.ParameterAttribute( | ||
| name='_hidden_input2.bias', is_static=True)) | ||
|
||
|
|
||
| #input1 architecture | ||
| input1 = paddle.layer.data( | ||
| name='input1_word', | ||
| type=paddle.data_type.integer_value_sequence(input1_dict_dim)) | ||
|
|
||
| with paddle.layer.mixed(size=word_emb_dim, bias_attr=False) as input1_emb: | ||
| input1_emb += paddle.layer.table_projection( | ||
| input=input1, | ||
| param_attr=paddle.attr.Param(name='emb_input1', initial_std=0.02)) | ||
|
||
|
|
||
| input1_vec = paddle.layer.pooling( | ||
| input=input1_emb, | ||
| pooling_type=paddle.pooling.Sum(), | ||
| #act=paddle.activation.Tanh(), | ||
|
||
| bias_attr=paddle.attr.ParameterAttribute( | ||
| name='_avg.bias_input1', initial_std=0.01)) | ||
|
|
||
| hidden_input1 = paddle.layer.fc( | ||
| input=input1_vec, | ||
| size=hidden_dim, | ||
| act=paddle.activation.Tanh(), | ||
| param_attr=paddle.attr.ParameterAttribute( | ||
| name='_hidden_input1.w', initial_std=0.03), | ||
| bias_attr=paddle.attr.ParameterAttribute(name='_hidden_input1.bias')) | ||
|
|
||
| cost = paddle.layer.mse_cost(input=hidden_input1, label=hidden_input2) | ||
| #cost = paddle.layer.huber_cost(input=hidden_input1, label=hidden_input2) | ||
|
||
| return cost | ||
|
|
||
|
|
||
| def main(): | ||
| paddle.init(use_gpu=False, trainer_count=1) | ||
| is_generating = False | ||
|
|
||
| #input1 and input2 dict dim | ||
| dict_size = 30000 | ||
| input1_dict_dim = input2_dict_dim = dict_size | ||
|
|
||
| #train the network | ||
| if not is_generating: | ||
|
||
|
|
||
| cost = regression_net(input1_dict_dim, input2_dict_dim) | ||
| parameters = paddle.parameters.create(cost) | ||
|
|
||
| # define optimize method and trainer | ||
| optimizer = paddle.optimizer.Adam( | ||
| learning_rate=5e-5, | ||
| regularization=paddle.optimizer.L2Regularization(rate=8e-4)) | ||
|
|
||
| trainer = paddle.trainer.SGD( | ||
| cost=cost, parameters=parameters, update_equation=optimizer) | ||
|
|
||
| # define data reader | ||
|
||
| wmt14_reader = paddle.batch( | ||
| paddle.reader.shuffle( | ||
| paddle.dataset.wmt14.train(dict_size), buf_size=8192), | ||
| batch_size=5) | ||
|
|
||
| # define event_handler callback | ||
| def event_handler(event): | ||
| if isinstance(event, paddle.event.EndIteration): | ||
| if event.batch_id % 100 == 0: | ||
| print "\nPass %d, Batch %d, Cost %f, %s" % ( | ||
| event.pass_id, event.batch_id, event.cost) | ||
| if isinstance(event, paddle.event.EndPass): | ||
| result = trainer.test(reader=paddle.batch( | ||
| paddle.dataset.wmt14.test(dict_size), batch_size=2)) | ||
| print "Test %d, Cost %f" % (event.pass_id, result.cost) | ||
|
|
||
| # start to train | ||
| trainer.train( | ||
| reader=wmt14_reader, event_handler=event_handler, num_passes=2) | ||
|
|
||
|
|
||
| if __name__ == '__main__': | ||
| main() | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
is_generatingis not used in the configuration, so it can be removed.There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
done