33from paddle .v2 .fluid .framework import OpProtoHolder , Variable , Program , \
44 Operator
55from paddle .v2 .fluid .initializer import ConstantInitializer , \
6- NormalInitializer
6+ NormalInitializer , XavierInitializer
77from paddle .v2 .fluid .layer_helper import LayerHelper , unique_name
88import re
99import cStringIO
1818def fc (input ,
1919 size ,
2020 param_attr = None ,
21+ param_initializer = None ,
2122 bias_attr = None ,
23+ bias_initializer = None ,
2224 name = None ,
2325 act = None ,
2426 num_flatten_dims = 1 ,
@@ -31,7 +33,11 @@ def fc(input,
3133 input: The input tensor to the function
3234 size: The size of the layer
3335 param_attr: The parameters/weights to the FC Layer
36+ param_initializer: Initializer used for the weight/parameter.
37+ If None, XavierInitializer() is used
3438 bias_attr: The bias parameter for the FC layer
39+ bias_initializer: Initializer used for the bias.
40+ If None, then ConstantInitializer() is used
3541 name: Name/alias of the function
3642 act: Activation to be applied to the output of FC layer
3743 num_flatten_dims: Number of columns in input
@@ -50,18 +56,34 @@ def fc(input,
5056 to the LayerHelper constructor.
5157
5258 """
59+
60+ def _get_default_param_initializer ():
61+ return XavierInitializer ()
62+
63+ def _get_default_bias_initializer ():
64+ return ConstantInitializer ()
65+
5366 helper = LayerHelper ('fc' , ** locals ())
5467
5568 dtype = helper .input_dtype ()
5669
70+ if param_initializer is None :
71+ param_initializer = _get_default_param_initializer ()
72+
73+ if bias_initializer is None :
74+ bias_initializer = _get_default_bias_initializer ()
75+
5776 mul_results = []
5877 for input_var , param_attr in helper .iter_inputs_and_params ():
5978 input_shape = input_var .shape
6079 param_shape = [
6180 reduce (lambda a , b : a * b , input_shape [num_flatten_dims :], 1 )
6281 ] + [size ]
6382 w = helper .create_parameter (
64- attr = param_attr , shape = param_shape , dtype = dtype )
83+ attr = param_attr ,
84+ initializer = param_initializer ,
85+ shape = param_shape ,
86+ dtype = dtype )
6587 tmp = helper .create_tmp_variable (dtype )
6688 helper .append_op (
6789 type = "mul" ,
@@ -82,7 +104,7 @@ def fc(input,
82104 helper .append_op (
83105 type = "sum" , inputs = {"X" : mul_results }, outputs = {"Out" : pre_bias })
84106 # add bias
85- pre_activation = helper .append_bias_op (pre_bias )
107+ pre_activation = helper .append_bias_op (pre_bias , bias_initializer )
86108 # add activation
87109 return helper .append_activation (pre_activation )
88110
@@ -599,24 +621,41 @@ def sequence_conv(input,
599621 act = None ,
600622 padding = None ,
601623 bias_attr = None ,
624+ bias_initializer = None ,
602625 param_attr = None ,
626+ param_initializer = None ,
603627 main_program = None ,
604628 startup_program = None ):
605629 """
606630 This function creates the op for sequence_conv, using the inputs and
607631 other convolutional configurations for the filters and stride as given
608632 in the input parameters to the function.
609633 """
634+
635+ def _get_default_bias_initializer ():
636+ return ConstantInitializer ()
637+
638+ def _get_default_param_initializer ():
639+ return XavierInitializer ()
640+
610641 # FIXME(dzh) : want to unify the argument of python layer
611642 # function. So we ignore some unecessary attributes.
612643 # such as, padding_trainable, context_start.
613644
614645 helper = LayerHelper ('sequence_conv' , ** locals ())
615646 dtype = helper .input_dtype ()
616647
648+ if param_initializer is None :
649+ param_initializer = _get_default_param_initializer ()
650+ if bias_initializer is None :
651+ bias_initializer = _get_default_bias_initializer ()
652+
617653 filter_shape = [filter_size * input .shape [1 ], num_filters ]
618654 filter = helper .create_parameter (
619- attr = helper .param_attr , shape = filter_shape , dtype = dtype )
655+ attr = helper .param_attr ,
656+ shape = filter_shape ,
657+ dtype = dtype ,
658+ initializer = param_initializer )
620659 pre_bias = helper .create_tmp_variable (dtype )
621660
622661 helper .append_op (
@@ -631,7 +670,7 @@ def sequence_conv(input,
631670 'contextStart' : - int (filter_size / 2 ),
632671 'contextLength' : filter_size
633672 })
634- pre_act = helper .append_bias_op (pre_bias )
673+ pre_act = helper .append_bias_op (pre_bias , bias_initializer )
635674 return helper .append_activation (pre_act )
636675
637676
@@ -644,7 +683,9 @@ def conv2d(input,
644683 stride = [1 , 1 ],
645684 padding = None ,
646685 bias_attr = None ,
686+ bias_initializer = None ,
647687 param_attr = None ,
688+ param_initializer = None ,
648689 main_program = None ,
649690 startup_program = None ):
650691 """
@@ -654,6 +695,14 @@ def conv2d(input,
654695 This funciton can also append an activation on top of the
655696 conv-2d output, if mentioned in the input parameters.
656697 """
698+
699+ def _get_default_bias_initializer ():
700+ return ConstantInitializer ()
701+
702+ def _get_default_param_initializer (filter_size , num_channels ):
703+ std = (2.0 / (filter_size [0 ]** 2 * num_channels ))** 0.5
704+ return NormalInitializer (0.0 , std , 0 )
705+
657706 helper = LayerHelper ('conv2d' , ** locals ())
658707 dtype = helper .input_dtype ()
659708
@@ -675,12 +724,17 @@ def conv2d(input,
675724 input_shape = input .shape
676725 filter_shape = [num_filters , num_filter_channels ] + filter_size
677726
678- std = (2.0 / (filter_size [0 ]** 2 * num_channels ))** 0.5
727+ if param_initializer is None :
728+ param_initializer = _get_default_param_initializer (filter_size ,
729+ num_channels )
730+ if bias_initializer is None :
731+ bias_initializer = _get_default_bias_initializer ()
732+
679733 filter = helper .create_parameter (
680734 attr = helper .param_attr ,
681735 shape = filter_shape ,
682736 dtype = dtype ,
683- initializer = NormalInitializer ( 0.0 , std , 0 ) )
737+ initializer = param_initializer )
684738 pre_bias = helper .create_tmp_variable (dtype )
685739
686740 helper .append_op (
@@ -694,7 +748,8 @@ def conv2d(input,
694748 'paddings' : padding ,
695749 'groups' : groups })
696750
697- pre_act = helper .append_bias_op (pre_bias , dim_start = 1 , dim_end = 2 )
751+ pre_act = helper .append_bias_op (
752+ pre_bias , bias_initializer , dim_start = 1 , dim_end = 2 )
698753
699754 return helper .append_activation (pre_act )
700755
0 commit comments