-
Notifications
You must be signed in to change notification settings - Fork 5.9k
Closed
Description
In line 281, the parameter is data_layout, but in line 307, the function uses data_format. Thus, we only test NHWC format.
Paddle/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
Lines 281 to 307 in b26f505
| def test_with_place(place, data_layout, shape): | |
| # attr | |
| epsilon = 0.00001 | |
| momentum = 0.9 | |
| if data_layout == "NCHW": | |
| n, c, h, w = shape[0], shape[1], shape[2], shape[3] | |
| else: | |
| n, h, w, c = shape[0], shape[1], shape[2], shape[3] | |
| scale_shape = [c] | |
| np.random.seed(123) | |
| x = np.random.random_sample(shape).astype(np.float32) | |
| scale = np.random.random_sample(scale_shape).astype(np.float32) | |
| bias = np.random.random_sample(scale_shape).astype(np.float32) | |
| mean = np.zeros(scale_shape).astype(np.float32) | |
| variance = np.ones(scale_shape).astype(np.float32) | |
| # run forward | |
| y, saved_mean, var_ref = _reference_training(x, scale, bias, | |
| epsilon, data_layout) | |
| mean_out = saved_mean * (1. - momentum) + momentum * mean | |
| variance_out = var_ref * (1. - momentum) + momentum * variance | |
| saved_variance = 1. / np.sqrt(var_ref + epsilon) | |
| # run backward | |
| y_grad = np.random.random_sample(shape).astype(np.float32) | |
| x_grad, scale_grad, bias_grad = _reference_grad( | |
| x, y_grad, scale, saved_mean, var_ref, epsilon, data_format) |
The reason is that _reference_grad don't check the type of data_format:
Paddle/python/paddle/fluid/tests/unittests/test_batch_norm_op.py
Lines 91 to 121 in b26f505
| def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format): | |
| # Use the following formulas to calculate gradients: | |
| # grad_scale = | |
| # sum(grad_y * (x - mean)) * rsqrt(var + epsilon) | |
| # | |
| # grad_offset = sum(output_y) | |
| # | |
| # x_grad = | |
| # 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) - | |
| # (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon)) | |
| # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation | |
| if data_format == "NCHW": | |
| x = np.transpose(x, (0, 2, 3, 1)) | |
| y_grad = np.transpose(y_grad, (0, 2, 3, 1)) | |
| x_grad = scale * (y_grad - np.mean( | |
| y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean( | |
| y_grad * (x - mean), axis=(0, 1, 2)) / | |
| (var + epsilon)) / np.sqrt(var + epsilon) | |
| grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon), | |
| axis=(0, 1, 2)) | |
| grad_offset = np.sum(y_grad, axis=(0, 1, 2)) | |
| # transfer back to N, C, H, W | |
| if data_format == "NCHW": | |
| x_grad = np.transpose(x_grad, (0, 3, 1, 2)) | |
| x = np.transpose(x, (0, 3, 1, 2)) | |
| y_grad = np.transpose(y_grad, (0, 3, 1, 2)) | |
| return x_grad, grad_scale, grad_offset |
Metadata
Metadata
Assignees
Labels
No labels