Commit b169af47 authored by jean Ibarz's avatar jean Ibarz
Browse files

Added two layers to apply random scaling or random shift for tensors of...

Added two layers to apply random scaling or random shift for tensors of standardized shape BWHF (Batch, Width, Height, Features).
When training is False, the layers do not apply random scaling or shifting. However, the random shift layer still apply a right zero padding to ensure that the shape of the layer output is identical during training and testing.
parent 2bd6090c
import tensorflow as tf
class RandomShift2DLayer(tf.keras.layers.Layer):
def __init__(self, minval, maxval, axis):
super(RandomShift2DLayer, self).__init__()
assert minval >= 0
assert maxval >= 0
assert maxval >= minval
self.minval = minval
self.maxval = maxval
self.axis = axis
def call(self, input, training=None):
# always right zero pad along the axis to ensure that no part of the signal is lost after shifting
paddings = [[0, 0] for _ in range(len(input.shape))]
paddings[self.axis] = [0, self.maxval] # right zero padding
right_zero_padded_input = tf.pad(tensor=input, paddings=paddings)
if not training:
return right_zero_padded_input
else:
# we are limited to apply the same shift to any values
if self.minval == self.maxval:
shift = [self.minval]
else:
shift = tf.random.uniform(shape=(1,), minval=self.minval, maxval=self.maxval, dtype=tf.dtypes.int32)
shifted_input = tf.roll(input=right_zero_padded_input, shift=shift, axis=[self.axis])
return shifted_input
# return right_zero_padded_input
class RandomScale2DLayer(tf.keras.layers.Layer):
"""
A layer that randomly scale, during the training phase but not otherwise, the input values along the 0th axis,
by multiplication with a uniform random variable in [minval, maxval].
input: a vector of shape BWHF (Batch, Width, Height, Feature). In case of binaural sound, these dimensions
correspond to (Batch, Time, Channels, 1).
output: a vector of the same shape, but where each element of the Batch dimension is randomly scaled by a
coefficient randomly and uniformly chosen in [minval, maxval]
"""
def __init__(self, minval=0.1, maxval=10.0):
super(RandomScale2DLayer, self).__init__()
assert minval <= maxval
self.minval = minval
self.maxval = maxval
def call(self, input, training=None):
if not training:
return input
else:
scale_values = tf.random.uniform(shape=[input.shape[0], 1, 1, 1], minval=self.minval, maxval=self.maxval,
dtype=tf.dtypes.float32)
return tf.math.multiply(x=scale_values, y=input)
import numpy as np
from core.layers import RandomShift2DLayer, RandomScale2DLayer
def test_random_shift_2d_layer():
# create an input of shape BWHF = (10,3,2,1)
input = np.array([[1, 7], [2, 8], [3, 9]], dtype=np.float32)
input = np.expand_dims(input, axis=0)
input = np.repeat(a=input, repeats=10, axis=0)
input = np.expand_dims(input, axis=-1)
assert input.shape == (10, 3, 2, 1)
# apply a deterministic shift of N values, i.e. a random_shift in [N,N]:
test_shifts = [0, 1, 5]
for test_shift in test_shifts:
# we check that the shift works for each possible axis
for axis in range(3):
layer = RandomShift2DLayer(minval=1, maxval=1, axis=axis)
np_output = layer(input, training=True).numpy()
# check the output shape
expected_shape = list(input.shape)
expected_shape[axis] += 1
expected_shape = tuple(expected_shape)
assert np_output.shape == expected_shape
# check one shift case...
if test_shift == 1 and axis == 0:
assert np.allclose(np_output[0, :, :, 0], [[0, 0], [0, 0], [0, 0]])
assert np.allclose(np_output[1, :, :, 0], [[1, 7], [2, 8], [3, 9]])
assert np.allclose(np_output[-2, :, :, 0], [[1, 7], [2, 8], [3, 9]])
assert np.allclose(np_output[-1, :, :, 0], [[1, 7], [2, 8], [3, 9]])
# maybe should check more cases...
# check that no shift is done when training is False
layer = RandomShift2DLayer(minval=1, maxval=1, axis=axis)
np_output = layer(input, training=False).numpy()
# check the output shape: right zero padding must occur, even when training is false,
# to ensure layer size consistency between training and testing
expected_shape = list(input.shape)
expected_shape[axis] += 1
expected_shape = tuple(expected_shape)
assert np_output.shape == expected_shape
# check one shift case...
if test_shift == 1 and axis == 0:
assert np.allclose(np_output[0, :, :, 0], [[1, 7], [2, 8], [3, 9]])
assert np.allclose(np_output[1, :, :, 0], [[1, 7], [2, 8], [3, 9]])
assert np.allclose(np_output[-2, :, :, 0], [[1, 7], [2, 8], [3, 9]])
assert np.allclose(np_output[-1, :, :, 0], [[0, 0], [0, 0], [0, 0]])
def test_random_scale_2d_layer():
# create an input of shape BWHF = (10,3,2,1)
input = np.array([[9, 99], [10, 100], [11, 101]], dtype=np.float32)
input = np.expand_dims(input, axis=0)
input = np.repeat(a=input, repeats=10, axis=0)
input = np.expand_dims(input, axis=-1)
assert input.shape == (10, 3, 2, 1)
test_ranges = [[5, 15], [-10, 2]]
for test_range in test_ranges:
minval, maxval = test_range
layer = RandomScale2DLayer(minval=minval, maxval=maxval)
m0 = 0
m1 = 0
rtol = 0.1 # tolerated relative error
n_iters = 100
for _ in range(n_iters):
np_output = layer(input, training=True).numpy() / n_iters
assert np_output.shape == input.shape
m0 += np.mean(np_output[:, :, 0, :])
m1 += np.mean(np_output[:, :, 1, :])
# check that in average, the values are scaled independantly and appropriately for each channel
assert np.isclose(a=m0, b=10 * np.mean(test_range), rtol=0.1)
assert np.isclose(a=m1, b=100 * np.mean(test_range), rtol=0.1)
# when not training the model, we want that the layer return the input without applying the random scaling
np_output = layer(input, training=False)
np_output = np_output.numpy()
assert np.allclose(a=input, b=np_output)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment