Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 19 additions & 22 deletions python/paddle/fluid/layers/nn.py
Original file line number Diff line number Diff line change
Expand Up @@ -727,8 +727,8 @@ def dynamic_gru(input,
def gru_unit(input,
hidden,
size,
weight=None,
bias=None,
param_attr=None,
bias_attr=None,
activation='tanh',
gate_activation='sigmoid'):
"""
Expand Down Expand Up @@ -759,8 +759,8 @@ def gru_unit(input,
input (Variable): The fc transformed input value of current step.
hidden (Variable): The hidden value of lstm unit from previous step.
size (integer): The input dimension value.
weight (ParamAttr): The weight parameters for gru unit. Default: None
bias (ParamAttr): The bias parameters for gru unit. Default: None
param_attr (ParamAttr): The weight parameters for gru unit. Default: None
bias_attr (ParamAttr): The bias parameters for gru unit. Default: None
activation (string): The activation type for cell (actNode).
Default: 'tanh'
gate_activation (string): The activation type for gates (actGate).
Expand Down Expand Up @@ -792,34 +792,31 @@ def gru_unit(input,
size = size / 3

# create weight
if weight is None:
weight = helper.create_parameter(
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
weight = helper.create_parameter(
attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)

gate = helper.create_tmp_variable(dtype)
reset_hidden_pre = helper.create_tmp_variable(dtype)
updated_hidden = helper.create_tmp_variable(dtype)
inputs = {'Input': input, 'HiddenPrev': hidden, 'Weight': weight}
# create bias

if bias is None:
if helper.bias_attr:
bias_size = [1, 3 * size]
bias = helper.create_parameter(
attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)

gate = helper.create_tmp_variable(dtype)
reset_hidden_pre = helper.create_tmp_variable(dtype)
updated_hidden = helper.create_tmp_variable(dtype)
inputs['Bias'] = bias

helper.append_op(
type='gru_unit',
inputs={'Input': input,
'HiddenPrev': hidden,
'Weight': weight},
inputs=inputs,
outputs={
'Gate': gate,
'ResetHiddenPrev': reset_hidden_pre,
'Hidden': updated_hidden,
},
attrs={
'activation': 0,
'gate_activation': 1,
'activation': 2, # tanh
'gate_activation': 1, # sigmoid
})

return updated_hidden, reset_hidden_pre, gate
Expand Down Expand Up @@ -3733,8 +3730,8 @@ def label_smooth(label,
name=None):
"""
Label smoothing is a mechanism to regularize the classifier layer and is
called label-smoothing regularization (LSR).
called label-smoothing regularization (LSR).

Label smoothing is proposed to encourage the model to be less confident,
since optimizing the log-likelihood of the correct label directly may
cause overfitting and reduce the ability of the model to adapt. Label
Expand All @@ -3758,10 +3755,10 @@ def label_smooth(label,
prior_dist(Variable): The prior distribution to be used to smooth
labels. If not provided, an uniform distribution
is used. The shape of :attr:`prior_dist` should
be :math:`(1, class\_num)`.
be :math:`(1, class\_num)`.
epsilon(float): The weight used to mix up the original ground-truth
distribution and the fixed distribution.
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
float_64, int etc.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Expand Down