Skip to content

Commit 7af363d

Browse files
author
chenjiawen
committed
revert 12103
1 parent aad57e7 commit 7af363d

File tree

2 files changed

+64
-72
lines changed

2 files changed

+64
-72
lines changed

python/paddle/fluid/optimizer.py

Lines changed: 62 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -123,7 +123,7 @@ def _create_accumulators(self, block, parameters):
123123
"""
124124
pass
125125

126-
def _finish_update(self, block, parameters_and_grads):
126+
def _finish_update(self, block):
127127
"""Finish any custom updates needed
128128
before completing an optimization step
129129
@@ -132,7 +132,7 @@ def _finish_update(self, block, parameters_and_grads):
132132
parameters: list of parameter variables for the optimizer
133133
134134
Returns:
135-
None
135+
list of finish ops or None
136136
"""
137137
pass
138138

@@ -237,7 +237,7 @@ def _create_optimization_pass(self,
237237

238238
# Get custom finish ops for subclasses
239239
# FIXME: Need to fix this once we figure out how to handle dependencies
240-
self._finish_update(loss.block, parameters_and_grads)
240+
self._finish_update(loss.block)
241241

242242
end = len(global_block.ops)
243243
return global_block.slice_ops(start, end)
@@ -487,8 +487,6 @@ class AdamOptimizer(Optimizer):
487487
"""
488488
_moment1_acc_str = "moment1"
489489
_moment2_acc_str = "moment2"
490-
_beta1_pow_acc_str = "beta1_pow_acc"
491-
_beta2_pow_acc_str = "beta2_pow_acc"
492490

493491
def __init__(self,
494492
learning_rate=0.001,
@@ -510,22 +508,32 @@ def __init__(self,
510508
def _create_accumulators(self, block, parameters):
511509
assert isinstance(block, framework.Block)
512510

511+
main_block = block.program.global_block()
512+
# Create beta1 and beta2 power tensors
513+
beta_shape = [1]
514+
self._beta1_pow_acc = self.helper.create_global_variable(
515+
name=unique_name.generate('beta1_pow_acc'),
516+
dtype='float32' if self._dtype == None else self._dtype,
517+
shape=beta_shape,
518+
lod_level=0,
519+
persistable=True)
520+
self.helper.set_variable_initializer(
521+
self._beta1_pow_acc, initializer=Constant(self._beta1))
522+
523+
self._beta2_pow_acc = self.helper.create_global_variable(
524+
name=unique_name.generate('beta2_pow_acc'),
525+
dtype='float32' if self._dtype == None else self._dtype,
526+
shape=beta_shape,
527+
lod_level=0,
528+
persistable=True)
529+
530+
self.helper.set_variable_initializer(
531+
self._beta2_pow_acc, initializer=Constant(self._beta2))
532+
513533
# Create accumulator tensors for first and second moments
514534
for p in parameters:
515535
self._add_accumulator(self._moment1_acc_str, p)
516536
self._add_accumulator(self._moment2_acc_str, p)
517-
self._add_accumulator(
518-
name=self._beta1_pow_acc_str,
519-
param=p,
520-
dtype='float32',
521-
fill_value=self._beta1,
522-
shape=[1])
523-
self._add_accumulator(
524-
name=self._beta2_pow_acc_str,
525-
param=p,
526-
dtype='float32',
527-
fill_value=self._beta2,
528-
shape=[1])
529537

530538
def _append_optimize_op(self, block, param_and_grad):
531539
assert isinstance(block, framework.Block)
@@ -534,11 +542,6 @@ def _append_optimize_op(self, block, param_and_grad):
534542
param_and_grad[0])
535543
moment2 = self._get_accumulator(self._moment2_acc_str,
536544
param_and_grad[0])
537-
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
538-
param_and_grad[0])
539-
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
540-
param_and_grad[0])
541-
542545
# create the adam optimize op
543546
adam_op = block.append_op(
544547
type=self.type,
@@ -548,8 +551,8 @@ def _append_optimize_op(self, block, param_and_grad):
548551
"LearningRate": self._create_param_lr(param_and_grad),
549552
"Moment1": moment1,
550553
"Moment2": moment2,
551-
"Beta1Pow": beta1_pow_acc,
552-
"Beta2Pow": beta2_pow_acc
554+
"Beta1Pow": self._beta1_pow_acc,
555+
"Beta2Pow": self._beta2_pow_acc
553556
},
554557
outputs={
555558
"ParamOut": param_and_grad[0],
@@ -564,30 +567,24 @@ def _append_optimize_op(self, block, param_and_grad):
564567

565568
return adam_op
566569

567-
def _finish_update(self, block, param_and_grads):
570+
def _finish_update(self, block):
568571
"""Update Beta1 and Beta2 Power accumulators
569572
"""
570573
assert isinstance(block, framework.Block)
571574
main_block = block.program.global_block()
572-
for param, grad in param_and_grads:
573-
if grad is None:
574-
continue
575-
with param.block.program.optimized_guard([param, grad]):
576-
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
577-
param)
578-
beta2_pow_acc = self._get_accumulator(self._beta2_pow_acc_str,
579-
param)
580-
main_block.append_op(
581-
type="scale",
582-
inputs={"X": beta1_pow_acc},
583-
outputs={"Out": beta1_pow_acc},
584-
attrs={"scale": self._beta1})
585-
586-
main_block.append_op(
587-
type="scale",
588-
inputs={"X": beta2_pow_acc},
589-
outputs={"Out": beta2_pow_acc},
590-
attrs={"scale": self._beta2})
575+
scale_beta1 = main_block.append_op(
576+
type="scale",
577+
inputs={"X": self._beta1_pow_acc},
578+
outputs={"Out": self._beta1_pow_acc},
579+
attrs={"scale": self._beta1})
580+
581+
scale_beta2 = main_block.append_op(
582+
type="scale",
583+
inputs={"X": self._beta2_pow_acc},
584+
outputs={"Out": self._beta2_pow_acc},
585+
attrs={"scale": self._beta2})
586+
587+
return [scale_beta1, scale_beta2]
591588

592589

593590
class AdamaxOptimizer(Optimizer):
@@ -630,7 +627,6 @@ class AdamaxOptimizer(Optimizer):
630627
"""
631628
_moment_acc_str = "moment"
632629
_inf_norm_acc_str = "inf_norm"
633-
_beta1_pow_acc_str = "beta1_pow_acc"
634630

635631
def __init__(self,
636632
learning_rate=0.001,
@@ -650,25 +646,28 @@ def __init__(self,
650646
self._epsilon = epsilon
651647

652648
def _create_accumulators(self, block, parameters):
649+
# Create beta1 power accumulator tensor
650+
beta_shape = [1]
651+
self._beta1_pow_acc = self.helper.create_global_variable(
652+
name=unique_name.generate('beta1_pow_acc'),
653+
dtype='float32' if self._dtype == None else self._dtype,
654+
shape=beta_shape,
655+
lod_level=0,
656+
persistable=True)
657+
self.helper.set_variable_initializer(
658+
self._beta1_pow_acc, initializer=Constant(self._beta1))
659+
653660
# Create accumulator tensors for first moment and infinity norm
654661
for p in parameters:
655662
self._add_accumulator(self._moment_acc_str, p)
656663
self._add_accumulator(self._inf_norm_acc_str, p)
657-
self._add_accumulator(
658-
name=self._beta1_pow_acc_str,
659-
param=p,
660-
dtype='float32',
661-
fill_value=self._beta1,
662-
shape=[1])
663664

664665
def _append_optimize_op(self, block, param_and_grad):
665666
assert isinstance(block, framework.Block)
666667

667668
moment = self._get_accumulator(self._moment_acc_str, param_and_grad[0])
668669
inf_norm = self._get_accumulator(self._inf_norm_acc_str,
669670
param_and_grad[0])
670-
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
671-
param_and_grad[0])
672671
# create the adamax optimize op
673672
adamax_op = block.append_op(
674673
type=self.type,
@@ -678,7 +677,7 @@ def _append_optimize_op(self, block, param_and_grad):
678677
"LearningRate": self._create_param_lr(param_and_grad),
679678
"Moment": moment,
680679
"InfNorm": inf_norm,
681-
"Beta1Pow": beta1_pow_acc
680+
"Beta1Pow": self._beta1_pow_acc
682681
},
683682
outputs={
684683
"ParamOut": param_and_grad[0],
@@ -693,22 +692,18 @@ def _append_optimize_op(self, block, param_and_grad):
693692

694693
return adamax_op
695694

696-
def _finish_update(self, block, parameters_and_grads):
695+
def _finish_update(self, block):
697696
"""Update Beta1 Power accumulator
698697
"""
699698
assert isinstance(block, framework.Block)
700699
main_block = block.program.global_block()
701-
for param, grad in parameters_and_grads:
702-
if grad is None:
703-
continue
704-
with param.block.program.optimized_guard([param, grad]):
705-
beta1_pow_acc = self._get_accumulator(self._beta1_pow_acc_str,
706-
param)
707-
main_block.append_op(
708-
type="scale",
709-
inputs={"X": beta1_pow_acc},
710-
outputs={"Out": beta1_pow_acc},
711-
attrs={"scale": self._beta1})
700+
scale_beta1 = main_block.append_op(
701+
type="scale",
702+
inputs={"X": self._beta1_pow_acc},
703+
outputs={"Out": self._beta1_pow_acc},
704+
attrs={"scale": self._beta1})
705+
706+
return [scale_beta1]
712707

713708

714709
class DecayedAdagradOptimizer(Optimizer):
@@ -1162,10 +1157,7 @@ def __init__(self,
11621157
self.params_grads.append((param, grad))
11631158

11641159
for param, grad in self.params_grads:
1165-
if grad is None:
1166-
continue
1167-
with param.block.program.optimized_guard([param, grad]):
1168-
self._append_average_accumulate_op(param)
1160+
self._append_average_accumulate_op(param)
11691161

11701162
self.apply_program = Program()
11711163
block = self.apply_program.global_block()

python/paddle/fluid/tests/unittests/test_optimizer.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -287,7 +287,7 @@ def test_adam_optimizer(self):
287287

288288
# Check accumulators
289289
accumulators = adam_optimizer.get_accumulators()
290-
self.assertEqual(len(accumulators), 4)
290+
self.assertEqual(len(accumulators), 2)
291291
self.assertTrue(adam_optimizer.get_moment1_str() in accumulators)
292292
self.assertTrue(adam_optimizer.get_moment2_str() in accumulators)
293293
moment1_acc = accumulators[adam_optimizer.get_moment1_str()]
@@ -354,7 +354,7 @@ def test_adamax_optimizer(self):
354354

355355
# Check accumulators
356356
accumulators = adamax_optimizer.get_accumulators()
357-
self.assertEqual(len(accumulators), 3)
357+
self.assertEqual(len(accumulators), 2)
358358
self.assertTrue(adamax_optimizer.get_moment_str() in accumulators)
359359
self.assertTrue(adamax_optimizer.get_inf_norm_str() in accumulators)
360360
moment_acc = accumulators[adamax_optimizer.get_moment_str()]

0 commit comments

Comments
 (0)