@@ -727,8 +727,8 @@ def dynamic_gru(input,
727727def gru_unit (input ,
728728 hidden ,
729729 size ,
730- weight = None ,
731- bias = None ,
730+ param_attr = None ,
731+ bias_attr = None ,
732732 activation = 'tanh' ,
733733 gate_activation = 'sigmoid' ):
734734 """
@@ -759,8 +759,8 @@ def gru_unit(input,
759759 input (Variable): The fc transformed input value of current step.
760760 hidden (Variable): The hidden value of lstm unit from previous step.
761761 size (integer): The input dimension value.
762- weight (ParamAttr): The weight parameters for gru unit. Default: None
763- bias (ParamAttr): The bias parameters for gru unit. Default: None
762+ param_attr (ParamAttr): The weight parameters for gru unit. Default: None
763+ bias_attr (ParamAttr): The bias parameters for gru unit. Default: None
764764 activation (string): The activation type for cell (actNode).
765765 Default: 'tanh'
766766 gate_activation (string): The activation type for gates (actGate).
@@ -792,34 +792,31 @@ def gru_unit(input,
792792 size = size / 3
793793
794794 # create weight
795- if weight is None :
796- weight = helper .create_parameter (
797- attr = helper .param_attr , shape = [size , 3 * size ], dtype = dtype )
795+ weight = helper .create_parameter (
796+ attr = helper .param_attr , shape = [size , 3 * size ], dtype = dtype )
798797
798+ gate = helper .create_tmp_variable (dtype )
799+ reset_hidden_pre = helper .create_tmp_variable (dtype )
800+ updated_hidden = helper .create_tmp_variable (dtype )
801+ inputs = {'Input' : input , 'HiddenPrev' : hidden , 'Weight' : weight }
799802 # create bias
800-
801- if bias is None :
803+ if helper .bias_attr :
802804 bias_size = [1 , 3 * size ]
803805 bias = helper .create_parameter (
804806 attr = helper .bias_attr , shape = bias_size , dtype = dtype , is_bias = True )
805-
806- gate = helper .create_tmp_variable (dtype )
807- reset_hidden_pre = helper .create_tmp_variable (dtype )
808- updated_hidden = helper .create_tmp_variable (dtype )
807+ inputs ['Bias' ] = bias
809808
810809 helper .append_op (
811810 type = 'gru_unit' ,
812- inputs = {'Input' : input ,
813- 'HiddenPrev' : hidden ,
814- 'Weight' : weight },
811+ inputs = inputs ,
815812 outputs = {
816813 'Gate' : gate ,
817814 'ResetHiddenPrev' : reset_hidden_pre ,
818815 'Hidden' : updated_hidden ,
819816 },
820817 attrs = {
821- 'activation' : 0 ,
822- 'gate_activation' : 1 ,
818+ 'activation' : 2 , # tanh
819+ 'gate_activation' : 1 , # sigmoid
823820 })
824821
825822 return updated_hidden , reset_hidden_pre , gate
@@ -3733,8 +3730,8 @@ def label_smooth(label,
37333730 name = None ):
37343731 """
37353732 Label smoothing is a mechanism to regularize the classifier layer and is
3736- called label-smoothing regularization (LSR).
3737-
3733+ called label-smoothing regularization (LSR).
3734+
37383735 Label smoothing is proposed to encourage the model to be less confident,
37393736 since optimizing the log-likelihood of the correct label directly may
37403737 cause overfitting and reduce the ability of the model to adapt. Label
@@ -3758,10 +3755,10 @@ def label_smooth(label,
37583755 prior_dist(Variable): The prior distribution to be used to smooth
37593756 labels. If not provided, an uniform distribution
37603757 is used. The shape of :attr:`prior_dist` should
3761- be :math:`(1, class\_num)`.
3758+ be :math:`(1, class\_num)`.
37623759 epsilon(float): The weight used to mix up the original ground-truth
37633760 distribution and the fixed distribution.
3764- dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
3761+ dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
37653762 float_64, int etc.
37663763 name(str|None): A name for this layer(optional). If set None, the layer
37673764 will be named automatically.
0 commit comments