Skip to content

Commit 567ba14

Browse files
committed
Add bias for gru_unit_op and fix activation function
1 parent 7c194ac commit 567ba14

File tree

1 file changed

+19
-22
lines changed
  • python/paddle/fluid/layers

1 file changed

+19
-22
lines changed

python/paddle/fluid/layers/nn.py

Lines changed: 19 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -727,8 +727,8 @@ def dynamic_gru(input,
727727
def gru_unit(input,
728728
hidden,
729729
size,
730-
weight=None,
731-
bias=None,
730+
param_attr=None,
731+
bias_attr=None,
732732
activation='tanh',
733733
gate_activation='sigmoid'):
734734
"""
@@ -759,8 +759,8 @@ def gru_unit(input,
759759
input (Variable): The fc transformed input value of current step.
760760
hidden (Variable): The hidden value of lstm unit from previous step.
761761
size (integer): The input dimension value.
762-
weight (ParamAttr): The weight parameters for gru unit. Default: None
763-
bias (ParamAttr): The bias parameters for gru unit. Default: None
762+
param_attr (ParamAttr): The weight parameters for gru unit. Default: None
763+
bias_attr (ParamAttr): The bias parameters for gru unit. Default: None
764764
activation (string): The activation type for cell (actNode).
765765
Default: 'tanh'
766766
gate_activation (string): The activation type for gates (actGate).
@@ -792,34 +792,31 @@ def gru_unit(input,
792792
size = size / 3
793793

794794
# create weight
795-
if weight is None:
796-
weight = helper.create_parameter(
797-
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
795+
weight = helper.create_parameter(
796+
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
798797

798+
gate = helper.create_tmp_variable(dtype)
799+
reset_hidden_pre = helper.create_tmp_variable(dtype)
800+
updated_hidden = helper.create_tmp_variable(dtype)
801+
inputs = {'Input': input, 'HiddenPrev': hidden, 'Weight': weight}
799802
# create bias
800-
801-
if bias is None:
803+
if helper.bias_attr:
802804
bias_size = [1, 3 * size]
803805
bias = helper.create_parameter(
804806
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
805-
806-
gate = helper.create_tmp_variable(dtype)
807-
reset_hidden_pre = helper.create_tmp_variable(dtype)
808-
updated_hidden = helper.create_tmp_variable(dtype)
807+
inputs['Bias'] = bias
809808

810809
helper.append_op(
811810
type='gru_unit',
812-
inputs={'Input': input,
813-
'HiddenPrev': hidden,
814-
'Weight': weight},
811+
inputs=inputs,
815812
outputs={
816813
'Gate': gate,
817814
'ResetHiddenPrev': reset_hidden_pre,
818815
'Hidden': updated_hidden,
819816
},
820817
attrs={
821-
'activation': 0,
822-
'gate_activation': 1,
818+
'activation': 2, # tanh
819+
'gate_activation': 1, # sigmoid
823820
})
824821

825822
return updated_hidden, reset_hidden_pre, gate
@@ -3733,8 +3730,8 @@ def label_smooth(label,
37333730
name=None):
37343731
"""
37353732
Label smoothing is a mechanism to regularize the classifier layer and is
3736-
called label-smoothing regularization (LSR).
3737-
3733+
called label-smoothing regularization (LSR).
3734+
37383735
Label smoothing is proposed to encourage the model to be less confident,
37393736
since optimizing the log-likelihood of the correct label directly may
37403737
cause overfitting and reduce the ability of the model to adapt. Label
@@ -3758,10 +3755,10 @@ def label_smooth(label,
37583755
prior_dist(Variable): The prior distribution to be used to smooth
37593756
labels. If not provided, an uniform distribution
37603757
is used. The shape of :attr:`prior_dist` should
3761-
be :math:`(1, class\_num)`.
3758+
be :math:`(1, class\_num)`.
37623759
epsilon(float): The weight used to mix up the original ground-truth
37633760
distribution and the fixed distribution.
3764-
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
3761+
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
37653762
float_64, int etc.
37663763
name(str|None): A name for this layer(optional). If set None, the layer
37673764
will be named automatically.

0 commit comments

Comments
 (0)