@@ -151,7 +151,7 @@ def embedding(input, size, is_sparse=False, param_attr=None, dtype='float32'):
151151
152152 Args:
153153 input(Variable): Input to the function
154- size(tuple|list|None): Shape of the look up table parameter
154+ size(tuple|list|None): Shape of the look up table parameter
155155 is_sparse(bool): Boolean flag that specifying whether the input is sparse
156156 param_attr(ParamAttr): Parameters for this layer
157157 dtype(np.dtype|core.DataType|str): The type of data : float32, float_16, int etc
@@ -366,9 +366,9 @@ def cross_entropy(input, label, **kwargs):
366366
367367 1) One-hot cross-entropy:
368368 `soft_label = False`, `Label[i, 0]` indicates the class index for sample i:
369-
369+
370370 .. math::
371-
371+
372372 Y[i] = -\log(X[i, Label[i]])
373373
374374 2) Soft-label cross-entropy:
@@ -386,15 +386,15 @@ def cross_entropy(input, label, **kwargs):
386386 As a special case of 2), when each row of 'label' has only one
387387 non-zero element which is equal to 1, soft-label cross-entropy degenerates
388388 to a one-hot cross-entropy with one-hot label representation.
389-
389+
390390 Args:
391- input (Variable|list): a 2-D tensor with shape [N x D], where N is the
392- batch size and D is the number of classes. This input is a probability
391+ input (Variable|list): a 2-D tensor with shape [N x D], where N is the
392+ batch size and D is the number of classes. This input is a probability
393393 computed by the previous operator, which is almost always the result
394394 of a softmax operator.
395- label (Variable|list): the ground truth which is a 2-D tensor. When
396- `soft_label` is set to `False`, `label` is a tensor<int64> with shape
397- [N x 1]. When `soft_label` is set to `True`, `label` is a
395+ label (Variable|list): the ground truth which is a 2-D tensor. When
396+ `soft_label` is set to `False`, `label` is a tensor<int64> with shape
397+ [N x 1]. When `soft_label` is set to `True`, `label` is a
398398 tensor<float/double> with shape [N x D].
399399 soft_label (bool, via `**kwargs`): a flag indicating whether to interpretate
400400 the given labels as soft labels, default `False`.
@@ -403,7 +403,7 @@ def cross_entropy(input, label, **kwargs):
403403 A 2-D tensor with shape [N x 1], the cross entropy loss.
404404
405405 Raises:
406- `ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \
406+ `ValueError`: 1) the 1st dimension of `input` and `label` are not equal; 2) when \
407407 `soft_label == True`, and the 2nd dimension of `input` and `label` are not \
408408 equal; 3) when `soft_label == False`, and the 2nd dimension of `label` is not 1.
409409
@@ -727,9 +727,9 @@ def _get_default_param_initializer():
727727
728728def sequence_pool (input , pool_type , ** kwargs ):
729729 """
730- This function add the operator for sequence pooling.
731- It pools features of all time-steps of each instance, and is applied
732- on top of the input using pool_type mentioned in the parameters.
730+ This function add the operator for sequence pooling.
731+ It pools features of all time-steps of each instance, and is applied
732+ on top of the input using pool_type mentioned in the parameters.
733733
734734 It supports four pool_type:
735735
@@ -758,7 +758,7 @@ def sequence_pool(input, pool_type, **kwargs):
758758
759759 Args:
760760 input(variable): The input variable which is a LoDTensor.
761- pool_type (string): The pooling type of sequence_pool.
761+ pool_type (string): The pooling type of sequence_pool.
762762 It supports average, sum, sqrt and max.
763763
764764 Returns:
@@ -768,7 +768,7 @@ def sequence_pool(input, pool_type, **kwargs):
768768
769769 .. code-block:: python
770770
771- x = fluid.layers.data(name='x', shape=[7, 1],
771+ x = fluid.layers.data(name='x', shape=[7, 1],
772772 dtype='float32', lod_level=1)
773773 avg_x = fluid.layers.sequence_pool(input=x, pool_type='average')
774774 sum_x = fluid.layers.sequence_pool(input=x, pool_type='sum')
@@ -816,7 +816,7 @@ def sequence_first_step(input, **kwargs):
816816
817817 .. code-block:: python
818818
819- x = fluid.layers.data(name='x', shape=[7, 1],
819+ x = fluid.layers.data(name='x', shape=[7, 1],
820820 dtype='float32', lod_level=1)
821821 x_first_step = fluid.layers.sequence_first_step(input=x)
822822 """
@@ -849,7 +849,7 @@ def sequence_last_step(input, **kwargs):
849849
850850 .. code-block:: python
851851
852- x = fluid.layers.data(name='x', shape=[7, 1],
852+ x = fluid.layers.data(name='x', shape=[7, 1],
853853 dtype='float32', lod_level=1)
854854 x_last_step = fluid.layers.sequence_last_step(input=x)
855855 """
@@ -1168,25 +1168,26 @@ def lstm_unit(x_t,
11681168
11691169 .. math::
11701170
1171- i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i)
1171+ i_t & = \sigma(W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i)
11721172
1173- f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + W_{c_f}c_{t-1} + b_f)
1173+ f_t & = \sigma(W_{x_f}x_{t} + W_{h_f}h_{t-1} + b_f)
11741174
1175- c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t+ W_{h_c}h_{t-1} + b_c)
1175+ c_t & = f_tc_{t-1} + i_t tanh (W_{x_c}x_t + W_{h_c}h_{t-1} + b_c)
11761176
1177- o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + W_{c_o}c_t + b_o)
1177+ o_t & = \sigma(W_{x_o}x_{t} + W_{h_o}h_{t-1} + b_o)
11781178
11791179 h_t & = o_t tanh(c_t)
11801180
1181- The inputs of lstm unit includes :math:`x_t`, :math:`h_{t-1}` and
1182- :math:`c_{t-1}`. The implementation separates the linear transformation
1183- and non-linear transformation apart. Here, we take :math:`i_t` as an
1184- example. The linear transformation is applied by calling a `fc` layer and
1185- the equation is:
1181+ The inputs of lstm unit include :math:`x_t`, :math:`h_{t-1}` and
1182+ :math:`c_{t-1}`. The 2nd dimensions of :math:`h_{t-1}` and :math:`c_{t-1}`
1183+ should be same. The implementation separates the linear transformation and
1184+ non-linear transformation apart. Here, we take :math:`i_t` as an example.
1185+ The linear transformation is applied by calling a `fc` layer and the
1186+ equation is:
11861187
11871188 .. math::
11881189
1189- L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + W_{c_i}c_{t-1} + b_i
1190+ L_{i_t} = W_{x_i}x_{t} + W_{h_i}h_{t-1} + b_i
11901191
11911192 The non-linear transformation is applied by calling `lstm_unit_op` and the
11921193 equation is:
@@ -1198,9 +1199,12 @@ def lstm_unit(x_t,
11981199 This layer has two outputs including :math:`h_t` and :math:`o_t`.
11991200
12001201 Args:
1201- x_t (Variable): The input value of current step.
1202- hidden_t_prev (Variable): The hidden value of lstm unit.
1203- cell_t_prev (Variable): The cell value of lstm unit.
1202+ x_t (Variable): The input value of current step, a 2-D tensor with shape
1203+ M x N, M for batch size and N for input size.
1204+ hidden_t_prev (Variable): The hidden value of lstm unit, a 2-D tensor
1205+ with shape M x S, M for batch size and S for size of lstm unit.
1206+ cell_t_prev (Variable): The cell value of lstm unit, a 2-D tensor with
1207+ shape M x S, M for batch size and S for size of lstm unit.
12041208 forget_bias (float): The forget bias of lstm unit.
12051209 param_attr (ParamAttr): The attributes of parameter weights, used to set
12061210 initializer, name etc.
@@ -1213,14 +1217,15 @@ def lstm_unit(x_t,
12131217 Raises:
12141218 ValueError: The ranks of **x_t**, **hidden_t_prev** and **cell_t_prev**\
12151219 not be 2 or the 1st dimensions of **x_t**, **hidden_t_prev** \
1216- and **cell_t_prev** not be the same.
1220+ and **cell_t_prev** not be the same or the 2nd dimensions of \
1221+ **hidden_t_prev** and **cell_t_prev** not be the same.
12171222
12181223 Examples:
12191224
12201225 .. code-block:: python
12211226
12221227 x_t = fluid.layers.fc(input=x_t_data, size=10)
1223- prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=20 )
1228+ prev_hidden = fluid.layers.fc(input=prev_hidden_data, size=30 )
12241229 prev_cell = fluid.layers.fc(input=prev_cell_data, size=30)
12251230 hidden_value, cell_value = fluid.layers.lstm_unit(x_t=x_t,
12261231 hidden_t_prev=prev_hidden,
@@ -1239,7 +1244,11 @@ def lstm_unit(x_t,
12391244
12401245 if x_t .shape [0 ] != hidden_t_prev .shape [0 ] or x_t .shape [
12411246 0 ] != cell_t_prev .shape [0 ]:
1242- raise ValueError ("The 1s dimension of x_t, hidden_t_prev and "
1247+ raise ValueError ("The 1st dimensions of x_t, hidden_t_prev and "
1248+ "cell_t_prev must be the same." )
1249+
1250+ if hidden_t_prev .shape [1 ] != cell_t_prev .shape [1 ]:
1251+ raise ValueError ("The 2nd dimensions of hidden_t_prev and "
12431252 "cell_t_prev must be the same." )
12441253
12451254 if bias_attr is None :
@@ -1268,17 +1277,17 @@ def lstm_unit(x_t,
12681277
12691278def reduce_sum (input , dim = None , keep_dim = False ):
12701279 """
1271- Computes the sum of tensor elements over the given dimension.
1280+ Computes the sum of tensor elements over the given dimension.
12721281
12731282 Args:
12741283 input (Variable): The input variable which is a Tensor or LoDTensor.
1275- dim (int|None): The dimension along which the sum is performed. If
1276- :attr:`None`, sum all elements of :attr:`input` and return a
1277- Tensor variable with a single element, otherwise must be in the
1278- range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
1284+ dim (int|None): The dimension along which the sum is performed. If
1285+ :attr:`None`, sum all elements of :attr:`input` and return a
1286+ Tensor variable with a single element, otherwise must be in the
1287+ range :math:`[-rank(input), rank(input))`. If :math:`dim < 0`,
12791288 the dimension to reduce is :math:`rank + dim`.
1280- keep_dim (bool): Whether to reserve the reduced dimension in the
1281- output Tensor. The result tensor will have one fewer dimension
1289+ keep_dim (bool): Whether to reserve the reduced dimension in the
1290+ output Tensor. The result tensor will have one fewer dimension
12821291 than the :attr:`input` unless :attr:`keep_dim` is true.
12831292
12841293 Returns:
@@ -1312,17 +1321,17 @@ def reduce_sum(input, dim=None, keep_dim=False):
13121321
13131322def reduce_mean (input , dim = None , keep_dim = False ):
13141323 """
1315- Computes the mean of tensor elements over the given dimension.
1324+ Computes the mean of tensor elements over the given dimension.
13161325
13171326 Args:
13181327 input (Variable): The input variable which is a Tensor or LoDTensor.
1319- dim (int|None): The dimension along which the mean is computed. If
1320- :attr:`None`, compute the mean over all elements of :attr:`input`
1321- and return a Tensor variable with a single element, otherwise
1322- must be in the range :math:`[-rank(input), rank(input))`. If
1328+ dim (int|None): The dimension along which the mean is computed. If
1329+ :attr:`None`, compute the mean over all elements of :attr:`input`
1330+ and return a Tensor variable with a single element, otherwise
1331+ must be in the range :math:`[-rank(input), rank(input))`. If
13231332 :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
1324- keep_dim (bool): Whether to reserve the reduced dimension in the
1325- output Tensor. The result tensor will have one fewer dimension
1333+ keep_dim (bool): Whether to reserve the reduced dimension in the
1334+ output Tensor. The result tensor will have one fewer dimension
13261335 than the :attr:`input` unless :attr:`keep_dim` is true.
13271336
13281337 Returns:
@@ -1356,22 +1365,22 @@ def reduce_mean(input, dim=None, keep_dim=False):
13561365
13571366def reduce_max (input , dim = None , keep_dim = False ):
13581367 """
1359- Computes the maximum of tensor elements over the given dimension.
1368+ Computes the maximum of tensor elements over the given dimension.
13601369
13611370 Args:
13621371 input (Variable): The input variable which is a Tensor or LoDTensor.
1363- dim (int|None): The dimension along which the maximum is computed.
1364- If :attr:`None`, compute the maximum over all elements of
1365- :attr:`input` and return a Tensor variable with a single element,
1366- otherwise must be in the range :math:`[-rank(input), rank(input))`.
1372+ dim (int|None): The dimension along which the maximum is computed.
1373+ If :attr:`None`, compute the maximum over all elements of
1374+ :attr:`input` and return a Tensor variable with a single element,
1375+ otherwise must be in the range :math:`[-rank(input), rank(input))`.
13671376 If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
1368- keep_dim (bool): Whether to reserve the reduced dimension in the
1369- output Tensor. The result tensor will have one fewer dimension
1377+ keep_dim (bool): Whether to reserve the reduced dimension in the
1378+ output Tensor. The result tensor will have one fewer dimension
13701379 than the :attr:`input` unless :attr:`keep_dim` is true.
13711380
13721381 Returns:
13731382 Variable: The reduced Tensor variable.
1374-
1383+
13751384 Examples:
13761385 .. code-block:: python
13771386
@@ -1400,22 +1409,22 @@ def reduce_max(input, dim=None, keep_dim=False):
14001409
14011410def reduce_min (input , dim = None , keep_dim = False ):
14021411 """
1403- Computes the minimum of tensor elements over the given dimension.
1412+ Computes the minimum of tensor elements over the given dimension.
14041413
14051414 Args:
14061415 input (Variable): The input variable which is a Tensor or LoDTensor.
1407- dim (int|None): The dimension along which the minimum is computed.
1408- If :attr:`None`, compute the minimum over all elements of
1409- :attr:`input` and return a Tensor variable with a single element,
1410- otherwise must be in the range :math:`[-rank(input), rank(input))`.
1416+ dim (int|None): The dimension along which the minimum is computed.
1417+ If :attr:`None`, compute the minimum over all elements of
1418+ :attr:`input` and return a Tensor variable with a single element,
1419+ otherwise must be in the range :math:`[-rank(input), rank(input))`.
14111420 If :math:`dim < 0`, the dimension to reduce is :math:`rank + dim`.
1412- keep_dim (bool): Whether to reserve the reduced dimension in the
1413- output Tensor. The result tensor will have one fewer dimension
1421+ keep_dim (bool): Whether to reserve the reduced dimension in the
1422+ output Tensor. The result tensor will have one fewer dimension
14141423 than the :attr:`input` unless :attr:`keep_dim` is true.
14151424
14161425 Returns:
14171426 Variable: The reduced Tensor variable.
1418-
1427+
14191428 Examples:
14201429 .. code-block:: python
14211430
0 commit comments