Add bias for gru_unit_op and fix activation function

wanghaoshuang · wanghaoshuang · commit 567ba14e2c9b · 2018-04-20T15:47:30.000+08:00
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
@@ -727,8 +727,8 @@ def dynamic_gru(input,
 def gru_unit(input,
              hidden,
              size,
-             weight=None,
-             bias=None,
+             param_attr=None,
+             bias_attr=None,
              activation='tanh',
              gate_activation='sigmoid'):
     """
@@ -759,8 +759,8 @@ def gru_unit(input,
         input (Variable): The fc transformed input value of current step.
         hidden (Variable): The hidden value of lstm unit from previous step.
         size (integer): The input dimension value.
-        weight (ParamAttr): The weight parameters for gru unit. Default: None
-        bias (ParamAttr): The bias parameters for gru unit. Default: None
+        param_attr (ParamAttr): The weight parameters for gru unit. Default: None
+        bias_attr (ParamAttr): The bias parameters for gru unit. Default: None
         activation (string): The activation type for cell (actNode).
                              Default: 'tanh'
         gate_activation (string): The activation type for gates (actGate).
@@ -792,34 +792,31 @@ def gru_unit(input,
     size = size / 3
 
     # create weight
-    if weight is None:
-        weight = helper.create_parameter(
-            attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
+    weight = helper.create_parameter(
+        attr=helper.param_attr, shape=[size, 3 * size], dtype=dtype)
 
+    gate = helper.create_tmp_variable(dtype)
+    reset_hidden_pre = helper.create_tmp_variable(dtype)
+    updated_hidden = helper.create_tmp_variable(dtype)
+    inputs = {'Input': input, 'HiddenPrev': hidden, 'Weight': weight}
     # create bias
-
-    if bias is None:
+    if helper.bias_attr:
         bias_size = [1, 3 * size]
         bias = helper.create_parameter(
             attr=helper.bias_attr, shape=bias_size, dtype=dtype, is_bias=True)
-
-    gate = helper.create_tmp_variable(dtype)
-    reset_hidden_pre = helper.create_tmp_variable(dtype)
-    updated_hidden = helper.create_tmp_variable(dtype)
+        inputs['Bias'] = bias
 
     helper.append_op(
         type='gru_unit',
-        inputs={'Input': input,
-                'HiddenPrev': hidden,
-                'Weight': weight},
+        inputs=inputs,
         outputs={
             'Gate': gate,
             'ResetHiddenPrev': reset_hidden_pre,
             'Hidden': updated_hidden,
         },
         attrs={
-            'activation': 0,
-            'gate_activation': 1,
+            'activation': 2,  # tanh
+            'gate_activation': 1,  # sigmoid
         })
 
     return updated_hidden, reset_hidden_pre, gate
@@ -3733,8 +3730,8 @@ def label_smooth(label,
                  name=None):
     """
     Label smoothing is a mechanism to regularize the classifier layer and is
-    called label-smoothing regularization (LSR). 
-    
+    called label-smoothing regularization (LSR).
+
     Label smoothing is proposed to encourage the model to be less confident,
     since optimizing the log-likelihood of the correct label directly may
     cause overfitting and reduce the ability of the model to adapt. Label
@@ -3758,10 +3755,10 @@ def label_smooth(label,
         prior_dist(Variable): The prior distribution to be used to smooth
                               labels. If not provided, an uniform distribution
                               is used. The shape of :attr:`prior_dist` should
-                              be :math:`(1, class\_num)`. 
+                              be :math:`(1, class\_num)`.
         epsilon(float): The weight used to mix up the original ground-truth
                         distribution and the fixed distribution.
-        dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32, 
+        dtype(np.dtype|core.VarDesc.VarType|str): The type of data : float32,
                                                   float_64, int etc.
         name(str|None): A name for this layer(optional). If set None, the layer
                         will be named automatically.