Skip to content

Bug in test_batch_norm_op.py #10093

@luotao1

Description

@luotao1

In line 281, the parameter is data_layout, but in line 307, the function uses data_format. Thus, we only test NHWC format.

def test_with_place(place, data_layout, shape):
# attr
epsilon = 0.00001
momentum = 0.9
if data_layout == "NCHW":
n, c, h, w = shape[0], shape[1], shape[2], shape[3]
else:
n, h, w, c = shape[0], shape[1], shape[2], shape[3]
scale_shape = [c]
np.random.seed(123)
x = np.random.random_sample(shape).astype(np.float32)
scale = np.random.random_sample(scale_shape).astype(np.float32)
bias = np.random.random_sample(scale_shape).astype(np.float32)
mean = np.zeros(scale_shape).astype(np.float32)
variance = np.ones(scale_shape).astype(np.float32)
# run forward
y, saved_mean, var_ref = _reference_training(x, scale, bias,
epsilon, data_layout)
mean_out = saved_mean * (1. - momentum) + momentum * mean
variance_out = var_ref * (1. - momentum) + momentum * variance
saved_variance = 1. / np.sqrt(var_ref + epsilon)
# run backward
y_grad = np.random.random_sample(shape).astype(np.float32)
x_grad, scale_grad, bias_grad = _reference_grad(
x, y_grad, scale, saved_mean, var_ref, epsilon, data_format)

The reason is that _reference_grad don't check the type of data_format:

def _reference_grad(x, y_grad, scale, mean, var, epsilon, data_format):
# Use the following formulas to calculate gradients:
# grad_scale =
# sum(grad_y * (x - mean)) * rsqrt(var + epsilon)
#
# grad_offset = sum(output_y)
#
# x_grad =
# 1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
# (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
# transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
if data_format == "NCHW":
x = np.transpose(x, (0, 2, 3, 1))
y_grad = np.transpose(y_grad, (0, 2, 3, 1))
x_grad = scale * (y_grad - np.mean(
y_grad, axis=(0, 1, 2)) - (x - mean) * np.mean(
y_grad * (x - mean), axis=(0, 1, 2)) /
(var + epsilon)) / np.sqrt(var + epsilon)
grad_scale = np.sum(y_grad * (x - mean) / np.sqrt(var + epsilon),
axis=(0, 1, 2))
grad_offset = np.sum(y_grad, axis=(0, 1, 2))
# transfer back to N, C, H, W
if data_format == "NCHW":
x_grad = np.transpose(x_grad, (0, 3, 1, 2))
x = np.transpose(x, (0, 3, 1, 2))
y_grad = np.transpose(y_grad, (0, 3, 1, 2))
return x_grad, grad_scale, grad_offset

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Type

    No type

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions