I am getting the following error from the code snippet below using Keras/Tensorflow 2.0: Dimensions must be equal, but are 3 and 12 for 'model_7/tf_op_layer_add_39/add_39' (op: 'AddV2') with input shapes: [12,3], [12]. It appears that problem is localized to the last stage, but I am not sure. I also get an interesting warning after the vae summary: WARNING:tensorflow:Output decoder missing from loss dictionary. We assume this was done on purpose. The fit and evaluate APIs will not be expecting any data to be passed to decoder.
Model: "model_7". Any help is most appreciated.
from tensorflow.keras.layers import Dense, Input, LSTM
from tensorflow.keras.layers import Conv2D, Flatten, Lambda
from tensorflow.keras.layers import Reshape, Conv2DTranspose
from tensorflow.keras.layers import TimeDistributed
from tensorflow.keras.layers import RepeatVector
from tensorflow.keras.models import Model
from tensorflow.keras.losses import mse, binary_crossentropy
from tensorflow.keras import backend as K
import numpy as np
import argparse
"""
From TOY generate temporal sequence
"""
def temporalize(X, y, lookback):
output_X = []
output_y = []
for i in range(len(X)-lookback-1):
t = []
for j in range(1,lookback+1):
# Gather past records upto the lookback period
t.append(X[[(i+j+1)], :])
output_X.append(t)
output_y.append(y[i+lookback+1])
return output_X, output_y
# reparameterization trick
# instead of sampling from Q(z|X), sample eps = N(0,I)
# z = z_mean + sqrt(var)*eps
def sampling(args):
z_mean, z_log_var = args
batch = K.shape(z_mean)[0]
# K is the keras backend
dim = K.int_shape(z_mean)[1]
# by default, random_normal has mean=0 and std=1.0
epsilon = K.random_normal(shape=(batch, dim))
return z_mean + K.exp(0.5 * z_log_var) * epsilon
"""
TOY example
"""
# define input timeseries
timeseries = np.zeros((100,2))
timeseries[:,0]= np.linspace(.1, 1, 100)
timeseries[:,1]= np.linspace(.1, 1, 100)**3
timesteps = timeseries.shape[0]
n_features = timeseries.shape[1]
timesteps = 3
x_train, y_train = temporalize(timeseries, np.zeros(len(timeseries)), timesteps)
n_features = 2
x_train = np.array(x_train)
x_train = x_train.reshape(x_train.shape[0], timesteps, n_features)
# network parameters
input_shape = (timesteps, n_features,)
first_dim = 128
intermediate_dim = 64
decoder_input_shape = (timesteps,n_features,)
batch_size = 128
latent_dim = 2
epochs = 50
"""
Build encoder
"""
# build encoder model
encoder_input = Input(shape=input_shape, name='encoder_input')
x = LSTM(first_dim,return_sequences=True)(encoder_input)
x = LSTM(intermediate_dim)(x)
z_mean = Dense(latent_dim, name='z_mean')(x)
z_log_var = Dense(latent_dim, name='z_log_var')(x)
# use reparameterization trick to push the sampling out as input
z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean,z_log_var])
z = RepeatVector(timesteps)(z)
encoder = Model(encoder_input, [z_mean, z_log_var, z], name="encoder")
encoder.summary()
"""
Build decoder
"""
# build decoder model
latent_inputs = Input(shape = decoder_input_shape, name='z_sampling')
x = LSTM(intermediate_dim, return_sequences=True)(latent_inputs)
x = LSTM(first_dim,return_sequences=True)(x)
decoder_output = TimeDistributed(Dense(n_features, activation='sigmoid'))(x)
# decoder_output = Dense(n_features, activation='sigmoid')(x)
# decoder_output = LSTM(n_features,activation = 'sigmoid')(x)
# instantiate decoder model
decoder = Model(latent_inputs, decoder_output, name='decoder')
decoder.summary()
# instantiate vae model
decoder_output = decoder(encoder(encoder_input)[2])
vae = Model(encoder_input, decoder_output)
# vae.summary()
# VAE loss = mse_loss or xent_loss + kl_loss
# reconstruction_loss = mse(encoder_input, decoder_output)
reconstruction_loss = binary_crossentropy(encoder_input,decoder_output)
# reconstruction_loss *= original_dim
# kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var)
# kl_loss = K.sum(kl_loss, axis=-1)
# kl_loss *= -0.5
kl_loss = - 0.5 * K.mean(1 + z_log_var - K.square(z_mean) - K.exp(z_log_var), axis=-1)
vae_loss = K.mean(reconstruction_loss + kl_loss)
vae.add_loss(vae_loss)
vae.compile(optimizer='adam')
vae.summary()
vae.fit(x_train, epochs=300, batch_size=12)