2424 'GradientClipByValue' ,
2525 'GradientClipByNorm' ,
2626 'GradientClipByGlobalNorm' ,
27- 'append_gradient_clip_ops' ,
28- 'error_clip_callback' ,
2927]
3028
3129
@@ -38,6 +36,25 @@ def append_clip_op(self, block, grad_name):
3836
3937
4038class ErrorClipByValue (BaseErrorClipAttr ):
39+ """
40+ Clips tensor values to the range [min, max].
41+
42+ Given a tensor t, this operation clips its value to min and max inplace.
43+
44+ - Any values less than min are set to min.
45+ - Any values greater than max are set to max.
46+
47+ Args:
48+ max (float): The maximum value to clip by.
49+ min (float, optional): The minimum value to clip by. if not set by user, \
50+ will be set to -max by framework.
51+
52+ Examples:
53+ .. code-block:: python
54+
55+ var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
56+ """
57+
4158 def __init__ (self , max , min = None ):
4259 max = float (max )
4360 if min is None :
@@ -99,6 +116,31 @@ def create_operators(self, param, grad):
99116
100117
101118class GradientClipByValue (BaseGradientClipAttr ):
119+ """
120+ Clips gradient values to the range [min, max].
121+
122+ Given a tensor t, this operation clips its value to min and max inplace.
123+
124+ - Any values less than min are set to min.
125+ - Any values greater than max are set to max.
126+
127+ Args:
128+ max (float): The maximum value to clip by.
129+ min (float, optional): The minimum value to clip by. if not set by user, \
130+ will be set to -max by framework.
131+
132+ Examples:
133+ .. code-block:: python
134+
135+ w_param_attrs = ParamAttr(name=None,
136+ initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
137+ learning_rate=1.0,
138+ regularizer=L1Decay(1.0),
139+ trainable=True,
140+ clip=GradientClipByValue(-1.0, 1.0))
141+ y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
142+ """
143+
102144 def __init__ (self , max , min = None ):
103145 max = float (max )
104146 if min is None :
@@ -120,6 +162,37 @@ def create_operators(self, param, grad):
120162
121163
122164class GradientClipByNorm (BaseGradientClipAttr ):
165+ """
166+ Clips tensor values to a maximum L2-norm.
167+
168+ This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
169+ If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
170+ will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
171+ :math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
172+ :math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
173+
174+ .. math::
175+
176+ Out = \\ frac{max\_norm * X}{norm(X)},
177+
178+ where :math:`norm(X)` represents the L2 norm of :math:`X`.
179+
180+ Args:
181+ clip_norm (float): The maximum norm value
182+
183+ Examples:
184+ .. code-block:: python
185+
186+ w_param_attrs = ParamAttr(name=None,
187+ initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
188+ learning_rate=1.0,
189+ regularizer=L1Decay(1.0),
190+ trainable=True,
191+ clip=GradientClipByNorm(clip_norm=2.0))
192+ y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
193+
194+ """
195+
123196 def __init__ (self , clip_norm ):
124197 self .clip_norm = clip_norm
125198
@@ -135,6 +208,44 @@ def create_operators(self, param, grad):
135208
136209
137210class GradientClipByGlobalNorm (BaseGradientClipAttr ):
211+ """
212+ Clips values of multiple tensors by the ratio of the sum of their norms.
213+
214+ Given a list of tensors t_list, and a clipping ratio clip_norm, this
215+ operation returns a list of clipped tensors list_clipped and the global
216+ norm (global_norm) of all tensors in t_list.
217+
218+ To perform the clipping, the values :math:`t\_list[i]` are set to:
219+
220+ .. math::
221+
222+ t\_list[i] = t\_list[i] * \\ frac{clip\_norm}{\max(global\_norm, clip\_norm)}
223+
224+ where:
225+
226+ .. math::
227+
228+ global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
229+
230+ If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
231+ otherwise they're all shrunk by the global ratio.
232+
233+ Args:
234+ clip_norm (float): The maximum norm value
235+ group_name (str, optional): The group name for this clip.
236+
237+ Examples:
238+ .. code-block:: python
239+
240+ p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
241+
242+ with fluid.program_guard(main_program=prog_clip):
243+ fluid.clip.set_gradient_clip(
244+ fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
245+ p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
246+
247+ """
248+
138249 def __init__ (self , clip_norm , group_name = "default_group" ):
139250 if not isinstance (group_name , basestring ):
140251 raise TypeError ("'group_name' must be a basestring." )
@@ -183,15 +294,16 @@ def create_operators(self, param, grad):
183294
184295def set_gradient_clip (clip , param_list = None , program = None ):
185296 """
186- To specify parameters that require gradient clip.
187- Args:
188- clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
189- which describes the type and detailed attributes of required gradient clip.
190- param_list(list, None by default): Parameters that require gradient clip.
191- It can be a list of parameter or a list of parameter's name.
192- When it's None, all parameters in the program will be included.
193- program(Program, None by default): The program where parameters are.
194- Will be the default main program when assigned with None.
297+ To specify parameters that require gradient clip.
298+
299+ Args:
300+ clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
301+ which describes the type and detailed attributes of required gradient clip.
302+ param_list(list(Variable)): Parameters that require gradient clip.
303+ It can be a list of parameter or a list of parameter's name.
304+ When it's None, all parameters in the program will be included.
305+ program(Program): The program where parameters are.
306+ Will be the default main program when assigned with None.
195307 """
196308 if not isinstance (clip , BaseGradientClipAttr ):
197309 raise TypeError (
0 commit comments