diff --git a/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc b/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc index d9e40546523c60..ce51f4f8318163 100644 --- a/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc +++ b/paddle/operators/sigmoid_cross_entropy_with_logits_op.cc @@ -99,7 +99,7 @@ class SigmoidCrossEntropyWithLogitsOpMaker "log(p/(1-p))."); AddInput("Labels", "(Tensor, default Tensor), a 2-D tensor of the same type " - "and shape as X. This input is a tensor of probabalistic labels " + "and shape as X. This input is a tensor of probabilistic labels " "for each logit"); AddOutput("Out", "(Tensor, default Tensor), a 2-D tensor with shape N x D " diff --git a/python/paddle/v2/fluid/__init__.py b/python/paddle/v2/fluid/__init__.py index 9677c9568c6783..c033b27beab52a 100644 --- a/python/paddle/v2/fluid/__init__.py +++ b/python/paddle/v2/fluid/__init__.py @@ -13,13 +13,14 @@ import optimizer import backward import regularizer +from param_attr import ParamAttr from core import LoDTensor, CPUPlace, GPUPlace Tensor = LoDTensor __all__ = framework.__all__ + executor.__all__ + [ 'io', 'initializer', 'layers', 'nets', 'optimizer', 'backward', - 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor' + 'regularizer', 'LoDTensor', 'CPUPlace', 'GPUPlace', 'Tensor', 'ParamAttr' ] diff --git a/python/paddle/v2/fluid/framework.py b/python/paddle/v2/fluid/framework.py index 1c42e4d44f5046..96fbd4b7027529 100644 --- a/python/paddle/v2/fluid/framework.py +++ b/python/paddle/v2/fluid/framework.py @@ -3,10 +3,11 @@ import numpy as np from . import core import proto.framework_pb2 as framework_pb2 +import contextlib __all__ = [ 'Block', 'Variable', 'Program', 'Operator', 'default_startup_program', - 'default_main_program' + 'default_main_program', 'program_guard' ] @@ -659,8 +660,83 @@ def __init__(self, block, shape, dtype, **kwargs): def default_startup_program(): + """ + Get default startup program. In startup program, Paddle will initialize + parameters, initialize nccl handle, etc. + + Returns: + Program: startup program + """ return _startup_program_ def default_main_program(): + """ + Get default main program. The main program is used for training or testing. + + Returns: + Program: main program + """ return _main_program_ + + +def switch_main_program(program): + """ + Switch the main program to a new program. + + Args: + program(Program): The new main program + + Returns: + Program: The previous main program + """ + global _main_program_ + prev_program = _main_program_ + _main_program_ = program + return prev_program + + +def switch_startup_program(program): + """ + Switch the startup program to a new program + Args: + program(Program): The new startup program + + Returns: + Program: The previous startup program + """ + global _startup_program_ + prev_program = _startup_program_ + _startup_program_ = program + return prev_program + + +@contextlib.contextmanager +def program_guard(main_program, startup_program=None): + """ + Switch program with `with` statement + + Examples: + >>> with program_guard(Program()): + >>> data = fluid.layers.data(...) + >>> hidden = fluid.layers.fc(...) + + Args: + main_program(Program): New main program inside `with` statement + startup_program(Program): New startup program inside `with` statement. + None means do not change startup program. + + Returns: + None + """ + if not isinstance(main_program, Program): + raise TypeError("main_program should be Program") + main_program = switch_main_program(main_program) + if startup_program is not None: + if not isinstance(startup_program, Program): + raise TypeError("startup_program should be Program") + startup_program = switch_startup_program(startup_program) + yield + switch_main_program(main_program) + if startup_program is not None: + switch_startup_program(startup_program) diff --git a/python/paddle/v2/fluid/layer_helper.py b/python/paddle/v2/fluid/layer_helper.py index 7762b0d88f3a62..5b384e5cf5df5e 100644 --- a/python/paddle/v2/fluid/layer_helper.py +++ b/python/paddle/v2/fluid/layer_helper.py @@ -1,8 +1,10 @@ import copy import itertools -from framework import Variable, default_main_program, default_startup_program, unique_name, dtype_is_floating +from framework import Variable, default_main_program, default_startup_program, \ + unique_name, dtype_is_floating from paddle.v2.fluid.initializer import Constant, Xavier +from param_attr import ParamAttr class LayerHelper(object): @@ -59,31 +61,15 @@ def input(self, input_param_name='input'): @property def param_attr(self): - default = {'name': None} - actual = self.kwargs.get('param_attr', None) - if actual is None: - actual = default - for default_field in default.keys(): - if default_field not in actual: - actual[default_field] = default[default_field] - return actual + return ParamAttr.to_attr(self.kwargs.get('param_attr', None)) @property def bias_attr(self): - default = {'name': None} - bias_attr = self.kwargs.get('bias_attr', None) - if bias_attr is None: - bias_attr = default - - if isinstance(bias_attr, dict): - for default_field in default.keys(): - if default_field not in bias_attr: - bias_attr[default_field] = default[default_field] - return bias_attr + return ParamAttr.to_attr(self.kwargs.get('bias_attr', None)) def multiple_param_attr(self, length): param_attr = self.param_attr - if isinstance(param_attr, dict): + if isinstance(param_attr, ParamAttr): param_attr = [param_attr] if len(param_attr) != 1 and len(param_attr) != length: @@ -111,23 +97,30 @@ def input_dtype(self, input_param_name='input'): raise ValueError("Data Type mismatch") return dtype - def create_parameter(self, attr, shape, dtype, suffix='w', - initializer=None): + def create_parameter(self, + attr, + shape, + dtype, + is_bias=False, + default_initializer=None): # Deepcopy the attr so that parameters can be shared in program - attr_copy = copy.deepcopy(attr) - if initializer is not None: - attr_copy['initializer'] = initializer + assert isinstance(attr, ParamAttr) + suffix = 'b' if is_bias else 'w' + + if default_initializer is None: + if is_bias: + attr.set_default_bias_initializer() + else: + attr.set_default_param_initializer() else: - attr_copy['initializer'] = self._get_default_initializer(dtype) - if attr_copy['name'] is None: - attr_copy['name'] = unique_name(".".join([self.name, suffix])) + attr.set_default_initializer(default_initializer) + if attr.name is None: + attr.name = unique_name(".".join([self.name, suffix])) + self.startup_program.global_block().create_parameter( - dtype=dtype, shape=shape, **attr_copy) + dtype=dtype, shape=shape, **attr.to_kwargs(with_initializer=True)) return self.main_program.global_block().create_parameter( - name=attr_copy['name'], - dtype=dtype, - shape=shape, - trainable=attr_copy.get('trainable', True)) + dtype=dtype, shape=shape, **attr.to_kwargs()) def create_tmp_variable(self, dtype): return self.main_program.current_block().create_var( @@ -152,11 +145,7 @@ def set_variable_initializer(self, var, initializer): persistable=True, initializer=initializer) - def append_bias_op(self, - input_var, - bias_initializer, - dim_start=1, - dim_end=None): + def append_bias_op(self, input_var, dim_start=1, dim_end=None): """ Append bias operator and return its output. If the user does not set bias_attr, append_bias_op will return input_var @@ -176,11 +165,7 @@ def append_bias_op(self, return input_var b = self.create_parameter( - attr=bias_attr, - shape=size, - dtype=input_var.dtype, - suffix='b', - initializer=bias_initializer) + attr=bias_attr, shape=size, dtype=input_var.dtype, is_bias=True) tmp = self.create_tmp_variable(dtype=input_var.dtype) self.append_op( type='elementwise_add', diff --git a/python/paddle/v2/fluid/layers.py b/python/paddle/v2/fluid/layers.py index 6adfac3a32c7c8..0afa2df558a80e 100644 --- a/python/paddle/v2/fluid/layers.py +++ b/python/paddle/v2/fluid/layers.py @@ -5,6 +5,7 @@ from paddle.v2.fluid.layer_helper import LayerHelper, unique_name import re import cStringIO +from param_attr import ParamAttr __all__ = [ 'fc', 'data', 'cross_entropy', 'conv2d', 'pool2d', 'embedding', 'concat', @@ -17,9 +18,7 @@ def fc(input, size, num_flatten_dims=1, param_attr=None, - param_initializer=None, bias_attr=None, - bias_initializer=None, act=None, name=None, main_program=None, @@ -54,23 +53,10 @@ def fc(input, to the LayerHelper constructor. """ - - def _get_default_param_initializer(): - return Xavier() - - def _get_default_bias_initializer(): - return Constant() - helper = LayerHelper('fc', **locals()) dtype = helper.input_dtype() - if param_initializer is None: - param_initializer = _get_default_param_initializer() - - if bias_initializer is None: - bias_initializer = _get_default_bias_initializer() - mul_results = [] for input_var, param_attr in helper.iter_inputs_and_params(): input_shape = input_var.shape @@ -78,10 +64,7 @@ def _get_default_bias_initializer(): reduce(lambda a, b: a * b, input_shape[num_flatten_dims:], 1) ] + [size] w = helper.create_parameter( - attr=param_attr, - initializer=param_initializer, - shape=param_shape, - dtype=dtype) + attr=param_attr, shape=param_shape, dtype=dtype, is_bias=False) tmp = helper.create_tmp_variable(dtype) helper.append_op( type="mul", @@ -102,7 +85,7 @@ def _get_default_bias_initializer(): helper.append_op( type="sum", inputs={"X": mul_results}, outputs={"Out": pre_bias}) # add bias - pre_activation = helper.append_bias_op(pre_bias, bias_initializer) + pre_activation = helper.append_bias_op(pre_bias) # add activation return helper.append_activation(pre_activation) @@ -110,7 +93,6 @@ def _get_default_bias_initializer(): def embedding(input, size, is_sparse=False, - param_initializer=None, param_attr=None, dtype='float32', main_program=None, @@ -119,6 +101,7 @@ def embedding(input, Embedding Layer. Args: + param_initializer: input: The input to the function size: The size of the layer is_sparse: A flag that decleares whether the input is sparse @@ -136,15 +119,9 @@ def embedding(input, """ - def _get_default_param_initializer(): - return Xavier() - helper = LayerHelper('embedding', **locals()) w = helper.create_parameter( - attr=helper.param_attr, - shape=size, - dtype=dtype, - initializer=param_initializer or _get_default_param_initializer()) + attr=helper.param_attr, shape=size, dtype=dtype, is_bias=False) tmp = helper.create_tmp_variable(dtype) helper.append_op( type='lookup_table', @@ -423,6 +400,7 @@ def func(**kwargs): _create_op_func_('scale') _create_op_func_('reshape') _create_op_func_('transpose') +_create_op_func_('sigmoid_cross_entropy_with_logits') def cast(x, dtype, main_program=None): @@ -471,19 +449,14 @@ def sums(input, out=None, main_program=None, startup_program=None): def linear_chain_crf(input, label, param_attr=None, - param_initializer=None, main_program=None, startup_program=None): - def _get_default_param_initializer(): - return Xavier() - helper = LayerHelper('linear_chain_crf', **locals()) size = input.shape[1] transition = helper.create_parameter( attr=helper.param_attr, shape=[size + 2, size], - dtype=helper.input_dtype(), - initializer=param_initializer or _get_default_param_initializer()) + dtype=helper.input_dtype()) alpha = helper.create_tmp_variable(dtype=helper.input_dtype()) emission_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) transition_exps = helper.create_tmp_variable(dtype=helper.input_dtype()) @@ -646,9 +619,7 @@ def sequence_conv(input, filter_stride=1, padding=None, bias_attr=None, - bias_initializer=None, param_attr=None, - param_initializer=None, act=None, main_program=None, startup_program=None): @@ -658,30 +629,15 @@ def sequence_conv(input, in the input parameters to the function. """ - def _get_default_bias_initializer(): - return Constant() - - def _get_default_param_initializer(): - return Xavier() - # FIXME(dzh) : want to unify the argument of python layer # function. So we ignore some unecessary attributes. # such as, padding_trainable, context_start. helper = LayerHelper('sequence_conv', **locals()) dtype = helper.input_dtype() - - if param_initializer is None: - param_initializer = _get_default_param_initializer() - if bias_initializer is None: - bias_initializer = _get_default_bias_initializer() - filter_shape = [filter_size * input.shape[1], num_filters] filter = helper.create_parameter( - attr=helper.param_attr, - shape=filter_shape, - dtype=dtype, - initializer=param_initializer) + attr=helper.param_attr, shape=filter_shape, dtype=dtype) pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -696,7 +652,7 @@ def _get_default_param_initializer(): 'contextStart': -int(filter_size / 2), 'contextLength': filter_size }) - pre_act = helper.append_bias_op(pre_bias, bias_initializer) + pre_act = helper.append_bias_op(pre_bias) return helper.append_activation(pre_act) @@ -707,9 +663,7 @@ def conv2d(input, padding=None, groups=None, param_attr=None, - param_initializer=None, bias_attr=None, - bias_initializer=None, act=None, name=None, main_program=None, @@ -722,13 +676,6 @@ def conv2d(input, conv-2d output, if mentioned in the input parameters. """ - def _get_default_bias_initializer(): - return Constant() - - def _get_default_param_initializer(filter_size, num_channels): - std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 - return Normal(0.0, std, 0) - helper = LayerHelper('conv2d', **locals()) dtype = helper.input_dtype() @@ -750,17 +697,16 @@ def _get_default_param_initializer(filter_size, num_channels): input_shape = input.shape filter_shape = [num_filters, num_filter_channels] + filter_size - if param_initializer is None: - param_initializer = _get_default_param_initializer(filter_size, - num_channels) - if bias_initializer is None: - bias_initializer = _get_default_bias_initializer() + def _get_default_param_initializer(): + std = (2.0 / (filter_size[0]**2 * num_channels))**0.5 + return Normal(0.0, std, 0) filter = helper.create_parameter( attr=helper.param_attr, shape=filter_shape, dtype=dtype, - initializer=param_initializer) + default_initializer=_get_default_param_initializer()) + pre_bias = helper.create_tmp_variable(dtype) helper.append_op( @@ -774,8 +720,7 @@ def _get_default_param_initializer(filter_size, num_channels): 'paddings': padding, 'groups': groups}) - pre_act = helper.append_bias_op( - pre_bias, bias_initializer, dim_start=1, dim_end=2) + pre_act = helper.append_bias_op(pre_bias, dim_start=1, dim_end=2) return helper.append_activation(pre_act) @@ -876,12 +821,10 @@ def batch_norm(input, attr=helper.param_attr, shape=param_shape, dtype=dtype, - initializer=Constant(1.0)) + default_initializer=Constant(1.0)) + bias = helper.create_parameter( - attr=helper.param_attr, - shape=param_shape, - dtype=dtype, - initializer=Constant(0.0)) + attr=helper.param_attr, shape=param_shape, dtype=dtype, is_bias=True) mean = helper.create_global_variable( dtype=input.dtype, shape=param_shape, persistable=True) @@ -1356,7 +1299,7 @@ def lod_rank_table(x, level=0, main_program=None): def max_sequence_len(rank_table, main_program=None): """ - This function creates an operator to calculate the length of + This function creates an operator to calculate the length of max seqence through input rank_table(should be a lod_rank_table) """ helper = LayerHelper("max_seqence_len", **locals()) @@ -1594,35 +1537,34 @@ def conv2d_transpose(input, padding=None, stride=None, param_attr=None, - param_initializer=None, main_program=None, - startup_program=None): + startup_program=None, + act=None): """ The transpose of conv2d layer. - + This layer is also known as deconvolution layer. - + Args: input(Variable): The input image with [N, C, H, W] format. num_filters(int): The number of filter. It is as same as the output image channel. output_size(int|tuple|None): The output image size. If output size is a - tuple, it must contain two integers, (image_H, image_W). This + tuple, it must contain two integers, (image_H, image_W). This parameter only works when filter_size is None. filter_size(int|tuple|None): The filter size. If filter_size is a tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise, the filter will be a square. None if use output size to calculate filter_size padding(int|tuple): The padding size. If padding is a tuple, it must - contain two integers, (padding_H, padding_W). Otherwise, the + contain two integers, (padding_H, padding_W). Otherwise, the padding_H = padding_W = padding. stride(int|tuple): The stride size. If stride is a tuple, it must contain two integers, (stride_H, stride_W). Otherwise, the stride_H = stride_W = stride. param_attr: Parameter Attribute. - param_initializer(Initializer): Parameter Initializer. Default is Xavier main_program(Program): the main program - startup_program(Program): the startup program + startup_program(Program): the startup program Returns: Variable: Output image. @@ -1663,10 +1605,7 @@ def conv2d_transpose(input, filter_shape = [input_channel, num_filters] + filter_size img_filter = helper.create_parameter( - dtype=input.dtype, - shape=filter_shape, - attr=helper.param_attr, - initializer=param_initializer) + dtype=input.dtype, shape=filter_shape, attr=helper.param_attr) out = helper.create_tmp_variable(dtype=input.dtype) helper.append_op( @@ -1675,7 +1614,8 @@ def conv2d_transpose(input, 'Filter': [img_filter]}, outputs={'Output': out}, attrs=op_attr) - return out + + return helper.append_activation(out) class ConditionalBlockGuard(BlockGuard): diff --git a/python/paddle/v2/fluid/optimizer.py b/python/paddle/v2/fluid/optimizer.py index 934e024742fd00..aa8cfddd90821f 100644 --- a/python/paddle/v2/fluid/optimizer.py +++ b/python/paddle/v2/fluid/optimizer.py @@ -35,7 +35,8 @@ def _append_optimize_op(self, block, param_and_grad): def _create_param_lr(self, param_and_grad): # create learning rate variable for every parameter param = param_and_grad[0] - param_lr = param.optimize_attr['learning_rate'] + param_lr = getattr(param, 'optimize_attr', + {'learning_rate': 1.0})['learning_rate'] param_lr_shape = [1] param_lr_var = self.helper.create_global_variable( name=unique_name("learning_rate"), diff --git a/python/paddle/v2/fluid/param_attr.py b/python/paddle/v2/fluid/param_attr.py new file mode 100644 index 00000000000000..802c983568575d --- /dev/null +++ b/python/paddle/v2/fluid/param_attr.py @@ -0,0 +1,59 @@ +from initializer import Initializer, Xavier, Constant +from regularizer import WeightDecayRegularizer + + +class ParamAttr(object): + def __init__(self, + name=None, + initializer=None, + learning_rate=1.0, + regularizer=None, + trainable=True): + self.name = name + self.initializer = initializer + self.learning_rate = learning_rate + self.regularizer = regularizer + self.trainable = trainable + + def set_default_initializer(self, initializer): + if initializer is None: + if self.initializer is None: + raise ValueError("ParamAttr.initializer is not set") + return + + if self.initializer is not None: + return + + self.initializer = initializer + + def set_default_param_initializer(self): + self.set_default_initializer(Xavier()) + + def set_default_bias_initializer(self): + self.set_default_initializer(Constant(0.0)) + + @staticmethod + def to_attr(arg): + if arg is None: + return ParamAttr() + elif isinstance(arg, str) or isinstance(arg, unicode): + return ParamAttr(name=arg) + elif isinstance(arg, Initializer): + return ParamAttr(initializer=arg) + elif isinstance(arg, WeightDecayRegularizer): + return ParamAttr(regularizer=arg) + elif isinstance(arg, bool): + return ParamAttr.to_attr(None) if arg else False + else: + raise TypeError("{0} cast to ParamAttr".format(type(arg))) + + def to_kwargs(self, with_initializer=False): + kwargs = { + 'name': self.name, + 'learning_rate': self.learning_rate, + 'regularizer': self.regularizer, + 'trainable': self.trainable + } + if with_initializer: + kwargs['initializer'] = self.initializer + return kwargs diff --git a/python/paddle/v2/fluid/regularizer.py b/python/paddle/v2/fluid/regularizer.py index c2c18e1951234f..1359af4485b55d 100644 --- a/python/paddle/v2/fluid/regularizer.py +++ b/python/paddle/v2/fluid/regularizer.py @@ -25,7 +25,7 @@ def append_regularization_ops(parameters_and_grads): for param, grad in parameters_and_grads: # If no gradient or no regularization specified, # then we don't need to do anything - if grad is None or param.regularizer is None: + if grad is None or getattr(param, 'regularizer', None) is None: params_and_grads.append((param, grad)) continue diff --git a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py index 93987a2b80dc9c..bcd6f4d6bc66fd 100644 --- a/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py +++ b/python/paddle/v2/fluid/tests/book/test_label_semantic_roles.py @@ -44,7 +44,7 @@ def db_lstm(): size=[pred_len, word_dim], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'vemb'}) + param_attr='vemb') mark_embedding = fluid.layers.embedding( input=mark, @@ -57,8 +57,8 @@ def db_lstm(): fluid.layers.embedding( size=[word_dict_len, word_dim], input=x, - param_attr={'name': embedding_name, - 'trainable': False}) for x in word_input + param_attr=fluid.ParamAttr( + name=embedding_name, trainable=False)) for x in word_input ] emb_layers.append(predicate_embedding) emb_layers.append(mark_embedding) @@ -125,8 +125,8 @@ def main(): crf_cost = fluid.layers.linear_chain_crf( input=feature_out, label=target, - param_attr={"name": 'crfw', - "learning_rate": mix_hidden_lr}) + param_attr=fluid.ParamAttr( + name='crfw', learning_rate=mix_hidden_lr)) avg_cost = fluid.layers.mean(x=crf_cost) # TODO(qiao) # 1. add crf_decode_layer and evaluator diff --git a/python/paddle/v2/fluid/tests/book/test_recommender_system.py b/python/paddle/v2/fluid/tests/book/test_recommender_system.py index f8dc1518579d5a..db91ca4f9c7d17 100644 --- a/python/paddle/v2/fluid/tests/book/test_recommender_system.py +++ b/python/paddle/v2/fluid/tests/book/test_recommender_system.py @@ -24,7 +24,7 @@ def get_usr_combined_features(): input=uid, dtype='float32', size=[USR_DICT_SIZE, 32], - param_attr={'name': 'user_table'}, + param_attr='user_table', is_sparse=IS_SPARSE) usr_fc = layers.fc(input=usr_emb, size=32) @@ -36,7 +36,7 @@ def get_usr_combined_features(): usr_gender_emb = layers.embedding( input=usr_gender_id, size=[USR_GENDER_DICT_SIZE, 16], - param_attr={'name': 'gender_table'}, + param_attr='gender_table', is_sparse=IS_SPARSE) usr_gender_fc = layers.fc(input=usr_gender_emb, size=16) @@ -48,7 +48,7 @@ def get_usr_combined_features(): input=usr_age_id, size=[USR_AGE_DICT_SIZE, 16], is_sparse=IS_SPARSE, - param_attr={'name': 'age_table'}) + param_attr='age_table') usr_age_fc = layers.fc(input=usr_age_emb, size=16) @@ -58,7 +58,7 @@ def get_usr_combined_features(): usr_job_emb = layers.embedding( input=usr_job_id, size=[USR_JOB_DICT_SIZE, 16], - param_attr={'name': 'job_table'}, + param_attr='job_table', is_sparse=IS_SPARSE) usr_job_fc = layers.fc(input=usr_job_emb, size=16) @@ -81,7 +81,7 @@ def get_mov_combined_features(): input=mov_id, dtype='float32', size=[MOV_DICT_SIZE, 32], - param_attr={'name': 'movie_table'}, + param_attr='movie_table', is_sparse=IS_SPARSE) mov_fc = layers.fc(input=mov_emb, size=32) diff --git a/python/paddle/v2/fluid/tests/book/test_word2vec.py b/python/paddle/v2/fluid/tests/book/test_word2vec.py index b0cd1a518cd1be..92d3629d42613e 100644 --- a/python/paddle/v2/fluid/tests/book/test_word2vec.py +++ b/python/paddle/v2/fluid/tests/book/test_word2vec.py @@ -23,25 +23,25 @@ size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') embed_second = fluid.layers.embedding( input=second_word, size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') embed_third = fluid.layers.embedding( input=third_word, size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') embed_forth = fluid.layers.embedding( input=forth_word, size=[dict_size, EMBED_SIZE], dtype='float32', is_sparse=IS_SPARSE, - param_attr={'name': 'shared_w'}) + param_attr='shared_w') concat_embed = fluid.layers.concat( input=[embed_first, embed_second, embed_third, embed_forth], axis=1) diff --git a/python/paddle/v2/fluid/tests/experimental/.gitignore b/python/paddle/v2/fluid/tests/experimental/.gitignore new file mode 100644 index 00000000000000..333b7d94c781b4 --- /dev/null +++ b/python/paddle/v2/fluid/tests/experimental/.gitignore @@ -0,0 +1,2 @@ +model_* +out diff --git a/python/paddle/v2/fluid/tests/experimental/test_dcgan.py b/python/paddle/v2/fluid/tests/experimental/test_dcgan.py new file mode 100644 index 00000000000000..6ee0e197cb1237 --- /dev/null +++ b/python/paddle/v2/fluid/tests/experimental/test_dcgan.py @@ -0,0 +1,165 @@ +import paddle.v2.fluid as fluid +import paddle.v2 as paddle +import numpy +import os + + +class Counter(object): + def __init__(self): + self.counter = 0 + + def __call__(self, *args, **kwargs): + tmp = self.counter + self.counter += 1 + return str(tmp) + + def __str__(self): + return self.__call__() + + +def D(x): + c = Counter() + conv1 = fluid.layers.conv2d( + input=x, + num_filters=20, + filter_size=5, + act='relu', + param_attr='D_{0}'.format(c), + bias_attr='D_{0}'.format(c)) + pool1 = fluid.layers.pool2d(input=conv1, pool_size=2, pool_type='avg') + conv2 = fluid.layers.conv2d( + input=pool1, + num_filters=20, + filter_size=5, + act='relu', + param_attr='D_{0}'.format(c), + bias_attr='D_{0}'.format(c)) + pool2 = fluid.layers.pool2d(input=conv2, pool_size=2, pool_type='avg') + logits = fluid.layers.fc(input=pool2, + size=1, + act=None, + param_attr='D_{0}'.format(c), + bias_attr='D_{0}'.format(c)) + return logits + + +def G(x): + conv1 = fluid.layers.conv2d_transpose( + input=x, num_filters=10, output_size=14, act='relu', param_attr="G_0") + conv2 = fluid.layers.conv2d_transpose( + input=conv1, + num_filters=1, + output_size=28, + act='tanh', + param_attr='G_1') + return conv2 + + +def plot(gen_data): + gen_data.resize(gen_data.shape[0], 28, 28) + n = int(math.ceil(math.sqrt(gen_data.shape[0]))) + fig = plt.figure(figsize=(n, n)) + gs = gridspec.GridSpec(n, n) + gs.update(wspace=0.05, hspace=0.05) + + for i, sample in enumerate(gen_data): + ax = plt.subplot(gs[i]) + plt.axis('off') + ax.set_xticklabels([]) + ax.set_yticklabels([]) + ax.set_aspect('equal') + plt.imshow(sample.reshape(28, 28), cmap='Greys_r') + + return fig + + +NOISE_SIZE = 100 +NUM_PASS = 10 + + +def main(): + startup_program = fluid.Program() + d_program = fluid.Program() + dg_program = fluid.Program() + with fluid.program_guard(d_program, startup_program): + img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32') + logit = D(img) + d_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=logit, + labels=fluid.layers.data( + name='label', shape=[1], dtype='float32')) + d_loss = fluid.layers.mean(x=d_loss) + + with fluid.program_guard(dg_program, startup_program): + noise = fluid.layers.data( + name='noise', shape=[NOISE_SIZE, 1, 1], dtype='float32') + g_img = G(x=noise) + g_program = dg_program.clone() + logit = D(g_img) + dg_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=logit, + labels=fluid.layers.fill_constant_batch_size_like( + input=noise, dtype='float32', shape=[-1, 1], value=1.0)) + dg_loss = fluid.layers.mean(x=dg_loss) + + opt = fluid.optimizer.Adam(learning_rate=1e-5) + + opt.minimize(loss=d_loss, startup_program=startup_program) + opt.minimize( + loss=dg_loss, + startup_program=startup_program, + parameter_list=[ + p.name for p in g_program.global_block().all_parameters() + ]) + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + + num_true = 121 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=60000), + batch_size=num_true) + + for pass_id in range(NUM_PASS): + for batch_id, data in enumerate(train_reader()): + num_true = len(data) + n = numpy.random.uniform( + low=-1.0, high=1.0, + size=[num_true * NOISE_SIZE]).astype('float32').reshape( + [num_true, NOISE_SIZE, 1, 1]) + generated_img = exe.run(g_program, + feed={'noise': n}, + fetch_list={g_img})[0] + + real_data = numpy.array(map(lambda x: x[0], data)).astype('float32') + real_data = real_data.reshape(num_true, 1, 28, 28) + total_data = numpy.concatenate([real_data, generated_img]) + total_label = numpy.concatenate([ + numpy.ones( + shape=[real_data.shape[0], 1], dtype='float32'), + numpy.zeros( + shape=[real_data.shape[0], 1], dtype='float32') + ]) + d_loss_np = exe.run(d_program, + feed={'img': total_data, + 'label': total_label}, + fetch_list={d_loss})[0] + + n = numpy.random.uniform( + low=-1.0, high=1.0, + size=[2 * num_true * NOISE_SIZE]).astype('float32').reshape( + [2 * num_true, NOISE_SIZE, 1, 1]) + dg_loss_np = exe.run(dg_program, + feed={'noise': n}, + fetch_list={dg_loss})[0] + print("Pass ID={0}, Batch ID={1}, D-Loss={2}, DG-Loss={3}".format( + pass_id, batch_id, d_loss_np, dg_loss_np)) + os.makedirs("./out/") + fig = plot(generated_img) + plt.savefig( + 'out/{0}.png'.format(str(pass_id).zfill(3)), bbox_inches='tight') + plt.close(fig) + + +if __name__ == '__main__': + main() diff --git a/python/paddle/v2/fluid/tests/experimental/test_gan.py b/python/paddle/v2/fluid/tests/experimental/test_gan.py new file mode 100644 index 00000000000000..1e8ccf6c8be296 --- /dev/null +++ b/python/paddle/v2/fluid/tests/experimental/test_gan.py @@ -0,0 +1,157 @@ +import paddle.v2.fluid as fluid +import paddle.v2 as paddle +import numpy +import os + + +class Counter(object): + def __init__(self): + self.counter = 0 + + def __call__(self, *args, **kwargs): + tmp = self.counter + self.counter += 1 + return str(tmp) + + def __str__(self): + return self.__call__() + + +def D(x): + c = Counter() + hidden = fluid.layers.fc(input=x, + size=200, + act='relu', + param_attr='D_{0}'.format(c), + bias_attr='D_{0}'.format(c)) + logits = fluid.layers.fc(input=hidden, + size=1, + act=None, + param_attr='D_{0}'.format(c), + bias_attr='D_{0}'.format(c)) + return logits + + +def G(x): + c = Counter() + hidden = fluid.layers.fc(input=x, + size=200, + act='relu', + param_attr='G_{0}'.format(c), + bias_attr='G_{0}'.format(c)) + hidden = fluid.layers.fc(input=hidden, + size=784, + act='tanh', + param_attr='G_{0}'.format(c), + bias_attr='G_{0}'.format(c)) + return hidden + + +def plot(gen_data): + gen_data.resize(gen_data.shape[0], 28, 28) + n = int(math.ceil(math.sqrt(gen_data.shape[0]))) + fig = plt.figure(figsize=(n, n)) + gs = gridspec.GridSpec(n, n) + gs.update(wspace=0.05, hspace=0.05) + + for i, sample in enumerate(gen_data): + ax = plt.subplot(gs[i]) + plt.axis('off') + ax.set_xticklabels([]) + ax.set_yticklabels([]) + ax.set_aspect('equal') + plt.imshow(sample.reshape(28, 28), cmap='Greys_r') + + return fig + + +NOISE_SIZE = 100 +NUM_PASS = 10 + + +def main(): + startup_program = fluid.Program() + d_program = fluid.Program() + dg_program = fluid.Program() + with fluid.program_guard(d_program, startup_program): + img = fluid.layers.data(name='img', shape=[784], dtype='float32') + logit = D(img) + d_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=logit, + labels=fluid.layers.data( + name='label', shape=[1], dtype='float32')) + d_loss = fluid.layers.mean(x=d_loss) + + with fluid.program_guard(dg_program, startup_program): + noise = fluid.layers.data( + name='noise', shape=[NOISE_SIZE], dtype='float32') + g_img = G(x=noise) + g_program = dg_program.clone() + logit = D(g_img) + dg_loss = fluid.layers.sigmoid_cross_entropy_with_logits( + x=logit, + labels=fluid.layers.fill_constant_batch_size_like( + input=noise, dtype='float32', shape=[-1, 1], value=1.0)) + dg_loss = fluid.layers.mean(x=dg_loss) + + opt = fluid.optimizer.Adam(learning_rate=1e-5) + + opt.minimize(loss=d_loss, startup_program=startup_program) + opt.minimize( + loss=dg_loss, + startup_program=startup_program, + parameter_list=[ + p.name for p in g_program.global_block().all_parameters() + ]) + exe = fluid.Executor(fluid.CPUPlace()) + exe.run(startup_program) + + num_true = 121 + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.mnist.train(), buf_size=60000), + batch_size=num_true) + + for pass_id in range(NUM_PASS): + for batch_id, data in enumerate(train_reader()): + num_true = len(data) + n = numpy.random.uniform( + low=-1.0, high=1.0, + size=[num_true * NOISE_SIZE]).astype('float32').reshape( + [num_true, NOISE_SIZE]) + generated_img = exe.run(g_program, + feed={'noise': n}, + fetch_list={g_img})[0] + + real_data = numpy.array(map(lambda x: x[0], data)).astype('float32') + real_data = real_data.reshape(num_true, 784) + total_data = numpy.concatenate([real_data, generated_img]) + total_label = numpy.concatenate([ + numpy.ones( + shape=[real_data.shape[0], 1], dtype='float32'), + numpy.zeros( + shape=[real_data.shape[0], 1], dtype='float32') + ]) + d_loss_np = exe.run(d_program, + feed={'img': total_data, + 'label': total_label}, + fetch_list={d_loss})[0] + + n = numpy.random.uniform( + low=-1.0, high=1.0, + size=[2 * num_true * NOISE_SIZE]).astype('float32').reshape( + [2 * num_true, NOISE_SIZE, 1, 1]) + dg_loss_np = exe.run(dg_program, + feed={'noise': n}, + fetch_list={dg_loss})[0] + print("Pass ID={0}, Batch ID={1}, D-Loss={2}, DG-Loss={3}".format( + pass_id, batch_id, d_loss_np, dg_loss_np)) + os.makedirs("./out/") + fig = plot(generated_img) + plt.savefig( + 'out/{0}.png'.format(str(pass_id).zfill(3)), bbox_inches='tight') + plt.close(fig) + + +if __name__ == '__main__': + main() diff --git a/python/paddle/v2/fluid/tests/test_layers.py b/python/paddle/v2/fluid/tests/test_layers.py index 62b2a0f9a11aa2..3c984a5fed7a21 100644 --- a/python/paddle/v2/fluid/tests/test_layers.py +++ b/python/paddle/v2/fluid/tests/test_layers.py @@ -1,68 +1,55 @@ +from __future__ import print_function import unittest import paddle.v2.fluid.layers as layers import paddle.v2.fluid.nets as nets -from paddle.v2.fluid.framework import Program +from paddle.v2.fluid.framework import Program, program_guard class TestBook(unittest.TestCase): def test_fit_a_line(self): program = Program() - x = layers.data( - name='x', shape=[13], dtype='float32', main_program=program) - y_predict = layers.fc(input=x, size=1, act=None, main_program=program) + with program_guard(program, startup_program=Program()): + x = layers.data(name='x', shape=[13], dtype='float32') + y_predict = layers.fc(input=x, size=1, act=None) + y = layers.data(name='y', shape=[1], dtype='float32') + cost = layers.square_error_cost(input=y_predict, label=y) + avg_cost = layers.mean(x=cost) + self.assertIsNotNone(avg_cost) + program.append_backward(avg_cost) - y = layers.data( - name='y', shape=[1], dtype='float32', main_program=program) - cost = layers.square_error_cost( - input=y_predict, label=y, main_program=program) - - avg_cost = layers.mean(x=cost, main_program=program) - self.assertIsNotNone(avg_cost) - program.append_backward(avg_cost) - - print str(program) + print(str(program)) def test_recognize_digits_mlp(self): program = Program() - - # Change g_program, so the rest layers use `g_program` - images = layers.data( - name='pixel', shape=[784], dtype='float32', main_program=program) - label = layers.data( - name='label', shape=[1], dtype='int32', main_program=program) - hidden1 = layers.fc(input=images, - size=128, - act='relu', - main_program=program) - hidden2 = layers.fc(input=hidden1, - size=64, - act='relu', - main_program=program) - predict = layers.fc(input=hidden2, - size=10, - act='softmax', - main_program=program) - cost = layers.cross_entropy( - input=predict, label=label, main_program=program) - avg_cost = layers.mean(x=cost, main_program=program) - self.assertIsNotNone(avg_cost) - - print str(program) + with program_guard(program, startup_program=Program()): + # Change g_program, so the rest layers use `g_program` + images = layers.data(name='pixel', shape=[784], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int32') + hidden1 = layers.fc(input=images, size=128, act='relu') + hidden2 = layers.fc(input=hidden1, size=64, act='relu') + predict = layers.fc(input=hidden2, size=10, act='softmax') + cost = layers.cross_entropy(input=predict, label=label) + avg_cost = layers.mean(x=cost) + self.assertIsNotNone(avg_cost) + + print(str(program)) def test_simple_conv2d(self): program = Program() - images = layers.data( - name='pixel', - shape=[3, 48, 48], - dtype='int32', - main_program=program) - layers.conv2d( - input=images, - num_filters=3, - filter_size=[4, 4], - main_program=program) + with program_guard(program, startup_program=Program()): + images = layers.data(name='pixel', shape=[3, 48, 48], dtype='int32') + layers.conv2d(input=images, num_filters=3, filter_size=[4, 4]) + print(str(program)) + + def test_conv2d_transpose(self): + program = Program() + kwargs = {'main_program': program} + img = layers.data( + name='pixel', shape=[3, 2, 2], dtype='float32', **kwargs) + layers.conv2d_transpose( + input=img, num_filters=10, output_size=28, **kwargs) print str(program) def test_conv2d_transpose(self): @@ -76,117 +63,90 @@ def test_conv2d_transpose(self): def test_recognize_digits_conv(self): program = Program() - - images = layers.data( - name='pixel', - shape=[1, 28, 28], - dtype='float32', - main_program=program) - label = layers.data( - name='label', shape=[1], dtype='int32', main_program=program) - conv_pool_1 = nets.simple_img_conv_pool( - input=images, - filter_size=5, - num_filters=2, - pool_size=2, - pool_stride=2, - act="relu", - main_program=program) - conv_pool_2 = nets.simple_img_conv_pool( - input=conv_pool_1, - filter_size=5, - num_filters=4, - pool_size=2, - pool_stride=2, - act="relu", - main_program=program) - - predict = layers.fc(input=conv_pool_2, - size=10, - act="softmax", - main_program=program) - cost = layers.cross_entropy( - input=predict, label=label, main_program=program) - avg_cost = layers.mean(x=cost, main_program=program) - - program.append_backward(avg_cost) - - print str(program) + with program_guard(program, startup_program=Program()): + images = layers.data( + name='pixel', shape=[1, 28, 28], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int32') + conv_pool_1 = nets.simple_img_conv_pool( + input=images, + filter_size=5, + num_filters=2, + pool_size=2, + pool_stride=2, + act="relu") + conv_pool_2 = nets.simple_img_conv_pool( + input=conv_pool_1, + filter_size=5, + num_filters=4, + pool_size=2, + pool_stride=2, + act="relu") + + predict = layers.fc(input=conv_pool_2, size=10, act="softmax") + cost = layers.cross_entropy(input=predict, label=label) + avg_cost = layers.mean(x=cost) + + program.append_backward(avg_cost) + + print(str(program)) def test_word_embedding(self): program = Program() - dict_size = 10000 - embed_size = 32 - first_word = layers.data( - name='firstw', shape=[1], dtype='int64', main_program=program) - second_word = layers.data( - name='secondw', shape=[1], dtype='int64', main_program=program) - third_word = layers.data( - name='thirdw', shape=[1], dtype='int64', main_program=program) - forth_word = layers.data( - name='forthw', shape=[1], dtype='int64', main_program=program) - next_word = layers.data( - name='nextw', shape=[1], dtype='int64', main_program=program) - - embed_first = layers.embedding( - input=first_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr={'name': 'shared_w'}, - main_program=program) - embed_second = layers.embedding( - input=second_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr={'name': 'shared_w'}, - main_program=program) - - embed_third = layers.embedding( - input=third_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr={'name': 'shared_w'}, - main_program=program) - embed_forth = layers.embedding( - input=forth_word, - size=[dict_size, embed_size], - dtype='float32', - param_attr={'name': 'shared_w'}, - main_program=program) - - concat_embed = layers.concat( - input=[embed_first, embed_second, embed_third, embed_forth], - axis=1, - main_program=program) - - hidden1 = layers.fc(input=concat_embed, - size=256, - act='sigmoid', - main_program=program) - predict_word = layers.fc(input=hidden1, - size=dict_size, - act='softmax', - main_program=program) - cost = layers.cross_entropy( - input=predict_word, label=next_word, main_program=program) - avg_cost = layers.mean(x=cost, main_program=program) - self.assertIsNotNone(avg_cost) - - print str(program) + with program_guard(program, startup_program=Program()): + dict_size = 10000 + embed_size = 32 + first_word = layers.data(name='firstw', shape=[1], dtype='int64') + second_word = layers.data(name='secondw', shape=[1], dtype='int64') + third_word = layers.data(name='thirdw', shape=[1], dtype='int64') + forth_word = layers.data(name='forthw', shape=[1], dtype='int64') + next_word = layers.data(name='nextw', shape=[1], dtype='int64') + + embed_first = layers.embedding( + input=first_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + embed_second = layers.embedding( + input=second_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + + embed_third = layers.embedding( + input=third_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + embed_forth = layers.embedding( + input=forth_word, + size=[dict_size, embed_size], + dtype='float32', + param_attr='shared_w') + + concat_embed = layers.concat( + input=[embed_first, embed_second, embed_third, embed_forth], + axis=1) + + hidden1 = layers.fc(input=concat_embed, size=256, act='sigmoid') + predict_word = layers.fc(input=hidden1, + size=dict_size, + act='softmax') + cost = layers.cross_entropy(input=predict_word, label=next_word) + avg_cost = layers.mean(x=cost) + self.assertIsNotNone(avg_cost) + + print(str(program)) def test_linear_chain_crf(self): program = Program() - - # Change g_program, so the rest layers use `g_program` - images = layers.data( - name='pixel', shape=[784], dtype='float32', main_program=program) - label = layers.data( - name='label', shape=[1], dtype='int32', main_program=program) - hidden = layers.fc(input=images, size=128, main_program=program) - crf = layers.linear_chain_crf( - input=hidden, label=label, main_program=program) - - print str(program) + with program_guard(program, startup_program=Program()): + images = layers.data(name='pixel', shape=[784], dtype='float32') + label = layers.data(name='label', shape=[1], dtype='int32') + hidden = layers.fc(input=images, size=128) + crf = layers.linear_chain_crf(input=hidden, label=label) + self.assertNotEqual(crf) + + print(str(program)) if __name__ == '__main__': diff --git a/python/paddle/v2/fluid/tests/test_recurrent_op.py b/python/paddle/v2/fluid/tests/test_recurrent_op.py index 84548847f76c63..36e0c84c0b8e7d 100644 --- a/python/paddle/v2/fluid/tests/test_recurrent_op.py +++ b/python/paddle/v2/fluid/tests/test_recurrent_op.py @@ -271,12 +271,12 @@ def create_rnn_op(self): temp_l = layers.fc(input=x_t, size=self.input_dim, - param_attr={'name': 'W'}, + param_attr='W', bias_attr=False, **self.p_info) temp_r = layers.fc(input=h_pre, size=self.input_dim, - param_attr={'name': 'U'}, + param_attr='U', bias_attr=False, **self.p_info)