Skip to content

Commit 581cf90

Browse files
authored
Merge pull request #2865 from lcy-seso/add_gated_unit_layer
add configuration helper for the gated unit.
2 parents 58f3de9 + e2fd06c commit 581cf90

File tree

5 files changed

+223
-2
lines changed

5 files changed

+223
-2
lines changed

doc/api/v2/config/layer.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -474,6 +474,11 @@ prelu
474474
.. autoclass:: paddle.v2.layer.prelu
475475
:noindex:
476476

477+
gated_unit
478+
-----------
479+
.. autoclass:: paddle.v2.layer.gated_unit
480+
:noindex:
481+
477482
Detection output Layer
478483
======================
479484

python/paddle/trainer_config_helpers/layers.py

Lines changed: 95 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -126,6 +126,7 @@
126126
'row_conv_layer',
127127
'dropout_layer',
128128
'prelu_layer',
129+
'gated_unit_layer',
129130
]
130131

131132

@@ -5862,7 +5863,7 @@ def prelu_layer(input,
58625863
:rtype: LayerOutput
58635864
"""
58645865

5865-
assert isinstance(input, LayerOutput), 'prelu_layer only accepts one input'
5866+
assert isinstance(input, LayerOutput), 'prelu_layer accepts only one input.'
58665867
assert isinstance(param_attr, ParameterAttribute)
58675868

58685869
l = Layer(
@@ -5876,3 +5877,96 @@ def prelu_layer(input,
58765877
layer_type=LayerType.PRELU,
58775878
parents=input,
58785879
size=l.config.size)
5880+
5881+
5882+
@wrap_name_default()
5883+
@layer_support(ERROR_CLIPPING, DROPOUT)
5884+
@wrap_act_default(act=LinearActivation())
5885+
def gated_unit_layer(input,
5886+
size,
5887+
act=None,
5888+
name=None,
5889+
gate_attr=None,
5890+
gate_param_attr=None,
5891+
gate_bias_attr=True,
5892+
inproj_attr=None,
5893+
inproj_param_attr=None,
5894+
inproj_bias_attr=True,
5895+
layer_attr=None):
5896+
"""
5897+
The gated unit layer implements a simple gating mechanism over the input.
5898+
The input :math:`X` is first projected into a new space :math:`X'`, and
5899+
it is also used to produce a gate weight :math:`\sigma`. Element-wise
5900+
prodict between :match:`X'` and :math:`\sigma` is finally returned.
5901+
5902+
Reference:
5903+
Language Modeling with Gated Convolutional Networks
5904+
https://arxiv.org/abs/1612.08083
5905+
5906+
.. math::
5907+
y=\\text{act}(X \cdot W + b)\otimes \sigma(X \cdot V + c)
5908+
5909+
The example usage is:
5910+
5911+
.. code-block:: python
5912+
gated_unit = gated_unit_layer(size=128, input=input_layer))
5913+
5914+
:param input: input for this layer.
5915+
:type input: LayerOutput
5916+
:param size: output size of the gated unit.
5917+
:type size: int
5918+
:param act: activation type of the projected input.
5919+
:type act: BaseActivation
5920+
:param name: name of this layer.
5921+
:type name: basestring
5922+
:param gate_attr: Attributes to tune the gate output, for example, error
5923+
clipping threshold, dropout and so on. See ExtraLayerAttribute for
5924+
more details.
5925+
:type gate_attr: ExtraLayerAttribute|None
5926+
:param gate_param_attr: Attributes to tune the learnable projected matrix
5927+
parameter of the gate.
5928+
:type gate_param_attr: ParameterAttribute|None
5929+
:param gate_bias_attr: Attributes to tune the learnable bias of the gate.
5930+
:type gate_bias_attr: ParameterAttribute|None
5931+
:param inproj_attr: Attributes to the tune the projected input, for
5932+
example, error clipping threshold, dropout and so on. See
5933+
ExtraLayerAttribute for more details.
5934+
:type inproj_attr: ExtraLayerAttribute|None
5935+
:param inproj_param_attr: Attributes to tune the learnable parameter of
5936+
the projection of input.
5937+
:type inproj_param_attr: ParameterAttribute|None
5938+
:param inproj_bias_attr: Attributes to tune the learnable bias of
5939+
projection of the input.
5940+
:type inproj_bias_attr: ParameterAttribute|None
5941+
:param layer_attr: Attributes to tune the final output of the gated unit,
5942+
for example, error clipping threshold, dropout and so on. See
5943+
ExtraLayerAttribute for more details.
5944+
:type layer_attr: ExtraLayerAttribute|None
5945+
:return: LayerOutput object.
5946+
:rtype: LayerOutput
5947+
"""
5948+
5949+
assert isinstance(
5950+
input, LayerOutput), 'The gated linear unit accepts only one input.'
5951+
5952+
input_proj = fc_layer(
5953+
input=input,
5954+
name="%s_input_proj" % name,
5955+
size=size,
5956+
act=act,
5957+
layer_attr=inproj_attr,
5958+
param_attr=inproj_param_attr,
5959+
bias_attr=inproj_bias_attr)
5960+
5961+
gate = fc_layer(
5962+
size=size,
5963+
name="%s_gate" % name,
5964+
act=SigmoidActivation(),
5965+
input=input,
5966+
layer_attr=gate_attr,
5967+
param_attr=gate_param_attr,
5968+
bias_attr=gate_bias_attr)
5969+
return mixed_layer(
5970+
name="%s_gated_act" % name,
5971+
input=dotmul_operator(input_proj, gate),
5972+
layer_attr=layer_attr)

python/paddle/trainer_config_helpers/tests/configs/file_list.sh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -7,6 +7,6 @@ test_rnn_group shared_fc shared_lstm shared_gru test_cost_layers_with_weight
77
test_spp_layer test_bilinear_interp test_maxout test_bi_grumemory math_ops
88
test_seq_concat_reshape test_pad test_smooth_l1 test_multiplex_layer
99
test_prelu_layer test_row_conv test_detection_output_layer test_multibox_loss_layer
10-
test_recursive_topology)
10+
test_recursive_topology test_gated_unit_layer)
1111

1212
export whole_configs=(test_split_datasource)
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
type: "nn"
2+
layers {
3+
name: "input"
4+
type: "data"
5+
size: 256
6+
active_type: ""
7+
}
8+
layers {
9+
name: "__gated_unit_layer_0___input_proj"
10+
type: "fc"
11+
size: 512
12+
active_type: "tanh"
13+
inputs {
14+
input_layer_name: "input"
15+
input_parameter_name: "___gated_unit_layer_0___input_proj.w0"
16+
}
17+
bias_parameter_name: "___gated_unit_layer_0___input_proj.wbias"
18+
error_clipping_threshold: 100.0
19+
}
20+
layers {
21+
name: "__gated_unit_layer_0___gate"
22+
type: "fc"
23+
size: 512
24+
active_type: "sigmoid"
25+
inputs {
26+
input_layer_name: "input"
27+
input_parameter_name: "___gated_unit_layer_0___gate.w0"
28+
}
29+
bias_parameter_name: "___gated_unit_layer_0___gate.wbias"
30+
error_clipping_threshold: 100.0
31+
}
32+
layers {
33+
name: "__gated_unit_layer_0___gated_act"
34+
type: "mixed"
35+
size: 512
36+
active_type: ""
37+
inputs {
38+
input_layer_name: "__gated_unit_layer_0___input_proj"
39+
}
40+
inputs {
41+
input_layer_name: "__gated_unit_layer_0___gate"
42+
}
43+
error_clipping_threshold: 100.0
44+
operator_confs {
45+
type: "dot_mul"
46+
input_indices: 0
47+
input_indices: 1
48+
input_sizes: 512
49+
input_sizes: 512
50+
output_size: 512
51+
dotmul_scale: 1
52+
}
53+
}
54+
parameters {
55+
name: "___gated_unit_layer_0___input_proj.w0"
56+
size: 131072
57+
initial_mean: 0.0
58+
initial_std: 0.0001
59+
dims: 256
60+
dims: 512
61+
initial_strategy: 0
62+
initial_smart: false
63+
}
64+
parameters {
65+
name: "___gated_unit_layer_0___input_proj.wbias"
66+
size: 512
67+
initial_mean: 0.0
68+
initial_std: 1
69+
dims: 1
70+
dims: 512
71+
initial_strategy: 0
72+
initial_smart: false
73+
}
74+
parameters {
75+
name: "___gated_unit_layer_0___gate.w0"
76+
size: 131072
77+
initial_mean: 0.0
78+
initial_std: 0.0001
79+
dims: 256
80+
dims: 512
81+
initial_strategy: 0
82+
initial_smart: false
83+
}
84+
parameters {
85+
name: "___gated_unit_layer_0___gate.wbias"
86+
size: 512
87+
initial_mean: 0.0
88+
initial_std: 1
89+
dims: 1
90+
dims: 512
91+
initial_strategy: 0
92+
initial_smart: false
93+
}
94+
input_layer_names: "input"
95+
output_layer_names: "__gated_unit_layer_0___gated_act"
96+
sub_models {
97+
name: "root"
98+
layer_names: "input"
99+
layer_names: "__gated_unit_layer_0___input_proj"
100+
layer_names: "__gated_unit_layer_0___gate"
101+
layer_names: "__gated_unit_layer_0___gated_act"
102+
input_layer_names: "input"
103+
output_layer_names: "__gated_unit_layer_0___gated_act"
104+
is_recurrent_layer_group: false
105+
}
106+
Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,16 @@
1+
from paddle.trainer_config_helpers import *
2+
3+
data = data_layer(name='input', size=256)
4+
glu = gated_unit_layer(
5+
size=512,
6+
input=data,
7+
act=TanhActivation(),
8+
gate_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
9+
gate_param_attr=ParamAttr(initial_std=1e-4),
10+
gate_bias_attr=ParamAttr(initial_std=1),
11+
inproj_attr=ExtraLayerAttribute(error_clipping_threshold=100.0),
12+
inproj_param_attr=ParamAttr(initial_std=1e-4),
13+
inproj_bias_attr=ParamAttr(initial_std=1),
14+
layer_attr=ExtraLayerAttribute(error_clipping_threshold=100.0))
15+
16+
outputs(glu)

0 commit comments

Comments
 (0)