Failed to implement normalization of the kerasa layer

Question

Failed to implement normalization of the kerasa layer

I am trying to implement level normalization in a fully connected neural network with keras. The problem that I encountered is that all losses NaNare not studied. Here is my code:

class DenseLN(Layer):
    def __init__(self, output_dim, init='glorot_uniform', activation='linear', weights=None,
                 W_regularizer=None, b_regularizer=None, activity_regularizer=None,
                 W_constraint=None, b_constraint=None, bias=True, input_dim=None, **kwargs):
        self.init = initializations.get(init)
        self.activation = activations.get(activation)
        self.output_dim = output_dim
        self.input_dim = input_dim
        self.epsilon = 1e-5        

        self.W_regularizer = regularizers.get(W_regularizer)
        self.b_regularizer = regularizers.get(b_regularizer)
        self.activity_regularizer = regularizers.get(activity_regularizer)

        self.W_constraint = constraints.get(W_constraint)
        self.b_constraint = constraints.get(b_constraint)

        self.bias = bias
        self.initial_weights = weights
        self.input_spec = [InputSpec(ndim=2)]

        if self.input_dim:
            kwargs['input_shape'] = (self.input_dim,)
        super(DenseLN, self).__init__(**kwargs)

    def ln(self, x):
        # layer normalization function
        m = K.mean(x, axis=0)
        std = K.sqrt(K.var(x, axis=0) + self.epsilon)
        x_normed = (x - m) / (std + self.epsilon)
        x_normed = self.gamma * x_normed + self.beta
        return x_normed

    def build(self, input_shape):
        assert len(input_shape) == 2
        input_dim = input_shape[1]
        self.input_spec = [InputSpec(dtype=K.floatx(),
                                     shape=(None, input_dim))]

        self.gamma = K.variable(np.ones(self.output_dim) * 0.2, name='{}_gamma'.format(self.name)) 
        self.beta = K.zeros((self.output_dim,), name='{}_beta'.format(self.name))

        self.W = self.init((input_dim, self.output_dim),
                           name='{}_W'.format(self.name))
        if self.bias:
            self.b = K.zeros((self.output_dim,),
                             name='{}_b'.format(self.name))
            self.trainable_weights = [self.W, self.gamma, self.beta, self.b]
        else:
            self.trainable_weights = [self.W, self.gamma, self.beta]
        self.regularizers = []
        if self.W_regularizer:
            self.W_regularizer.set_param(self.W)
            self.regularizers.append(self.W_regularizer)

        if self.bias and self.b_regularizer:
            self.b_regularizer.set_param(self.b)
            self.regularizers.append(self.b_regularizer)

        if self.activity_regularizer:
            self.activity_regularizer.set_layer(self)
            self.regularizers.append(self.activity_regularizer)

        self.constraints = {}
        if self.W_constraint:
            self.constraints[self.W] = self.W_constraint
        if self.bias and self.b_constraint:
            self.constraints[self.b] = self.b_constraint

        if self.initial_weights is not None:
            self.set_weights(self.initial_weights)
            del self.initial_weights

    def call(self, x, mask=None):
        output = K.dot(x, self.W)
        output = self.ln(output)
        #print (theano.tensor.shape(output))
        if self.bias:
            output += self.b
        return self.activation(output)

    def get_output_shape_for(self, input_shape):
        assert input_shape and len(input_shape) == 2
        return (input_shape[0], self.output_dim)

model = Sequential()
model.add(Dense(12, activation='sigmoid', input_dim=12))
model.add(DenseLN(98, activation='sigmoid'))
model.add(DenseLN(108, activation='sigmoid'))
model.add(DenseLN(1))
adadelta = Adadelta(lr=0.1, rho=0.95, epsilon=1e-08)
adagrad = Adagrad(lr=0.003, epsilon=1e-08)

model.compile(loss='poisson',
              optimizer=adagrad,
              metrics=['accuracy'])

model.fit(X_train_scale,
          Y_train,
          batch_size=3000,
          callbacks=[history],
          nb_epoch=300)

Do you know what is wrong here, and how can I fix it? Thanks in advance!

EDIT:

- . Dense, , . DenseLN, , 0.6+ , . , Dense, - ln self.ln(output) call. , gamma beta trainable_weights.

!

+4

python neural-network keras

user5779223 23 . '16 7:45

1

Alexander Measure · Answer 1 · 2017-07-24T19:59:41+0000

, . - :

class LayerNorm(Layer):
    """ Layer Normalization in the style of https://arxiv.org/abs/1607.06450 """
    def __init__(self, scale_initializer='ones', bias_initializer='zeros', **kwargs):
        super(LayerNorm, self).__init__(**kwargs)
        self.epsilon = 1e-6
        self.scale_initializer = initializers.get(scale_initializer)
        self.bias_initializer = initializers.get(bias_initializer)

    def build(self, input_shape):
        self.scale = self.add_weight(shape=(input_shape[-1],), 
                                     initializer=self.scale_initializer,
                                     trainable=True,
                                     name='{}_scale'.format(self.name))
        self.bias = self.add_weight(shape=(input_shape[-1],),
                                    initializer=self.bias_initializer,
                                    trainable=True,
                                    name='{}_bias'.format(self.name))
        self.built = True

    def call(self, x, mask=None):
        mean = K.mean(x, axis=-1, keepdims=True)
        std = K.std(x, axis=-1, keepdims=True)
        norm = (x - mean) * (1/(std + self.epsilon))
        return norm * self.scale + self.bias

    def compute_output_shape(self, input_shape):
        return input_shape

Failed to implement normalization of the kerasa layer

More articles: