python - Model stops improving validation accuracy at 46-49% it is seemingly overfitting

Question

Welcome To Ask or Share your Answers For Others

python - Model stops improving validation accuracy at 46-49% it is seemingly overfitting

asked Jan 27, 2021 in Technique[技术] by 深蓝 (71.8m points)

python - Model stops improving validation accuracy at 46-49% it is seemingly overfitting

I am working on this dataset:

https://www.kaggle.com/dionyshsmiaris/xrays which includes xrays of class:

0="normal"
1="pneumonia_cause_viral_infection"
2="pneumonia_cause_bacteria"

I am using a custom resnet of depth=50(although I tried 20 ,34 ,101) with the same results.

That is my import of data:

train_dir = "/content/gdrive/MyDrive/Xraydataset/train_images/"
test_dir =  "/content/gdrive/MyDrive/Xraydataset/test_images/"
def get_data(folder):
  X = []
  y = []
  for image_filename in tqdm(os.listdir(folder)):
    img_file = tf.keras.preprocessing.image.load_img(folder + '/' + image_filename,color_mode="grayscale")
    if img_file is not None:
      img_file=img_file.resize((224,224),1) 
      img_arr = np.asarray(img_file)
      X.append(img_arr)
  label=pd.read_csv('/content/gdrive/MyDrive/Xraydataset/labels_train.csv',usecols = ['class_id'])
  X = np.asarray(X)
  y = np.asarray(label)

  return X,y
X_train, y_train = get_data(train_dir)
X_test, y_test= get_data(test_dir)

Preprocessing,normalization,to categorical :

#print (X_train) #X_train normalised
X_train=np.expand_dims(X_train, axis=3)
X_test=np.expand_dims(X_test, axis=3)
X_train = X_train.astype('float32') / 255
X_test = X_test.astype('float32') / 255
X_train_mean = np.mean(X_train, axis=0)
X_train -= X_train_mean
X_test -= X_train_mean
print('x_train shape:', X_train.shape)
print(X_train.shape[0], 'train samples')
print(X_test.shape[0], 'test samples')
xtrain=X_train[:3700]
ytrain=y_train[:3700]
#split data to train and test
xtest=X_train[3700:]
ytest=y_train[3700:]
t_train = keras.utils.to_categorical(ytrain, 3)
t_test = keras.utils.to_categorical(ytest, 3)

Resnet Layer:

def resnet_layer(inputs,
                 num_filters=16,#16
                 kernel_size=3,#3
                 strides=1,
                 activation='relu',
                 batch_normalization=True,
                 conv_first=True):
    
    conv = Conv2D(num_filters,
                  kernel_size=kernel_size,
                  strides=strides,
                  padding='same',
                  kernel_initializer='he_normal',
                  kernel_regularizer=l2(1e-4))

    x = inputs
    if conv_first:
        x = conv(x)
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
    else:
        if batch_normalization:
            x = BatchNormalization()(x)
        if activation is not None:
            x = Activation(activation)(x)
        x = conv(x)
    return x

ResnetV1:

def resnet_v1(input_shape, depth, num_classes=3):
  
    if (depth - 2) % 6 != 0:
        raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])')
    # Start model definition.
    num_filters = 16
    num_res_blocks = int((depth - 2) / 6)

    inputs = Input(shape=input_shape)
    x = resnet_layer(inputs=inputs)
    # Instantiate the stack of residual units
    for stack in range(3):
        for res_block in range(num_res_blocks):
            strides = 1
            if stack > 0 and res_block == 0:  # first layer but not first stack
                strides = 2  # downsample
            y = resnet_layer(inputs=x,
                             num_filters=num_filters,
                             strides=strides)
            y = resnet_layer(inputs=y,
                             num_filters=num_filters,
                             activation=None)
            if stack > 0 and res_block == 0:  # first layer but not first stack
                # linear projection residual shortcut connection to match
                # changed dims
                x = resnet_layer(inputs=x,
                                 num_filters=num_filters,
                                 kernel_size=2, ### originally: 1,
                                 strides=strides,
                                 activation=None,
                                 batch_normalization=False)
            x = keras.layers.add([x, y])
            x = Activation('relu')(x)
        num_filters *= 2

    # Add classifier on top.
    # v1 does not use BN after last shortcut connection-ReLU

    x = AveragePooling2D(pool_size=8)(x)#8
    y = Flatten()(x)
    outputs = Dense(num_classes,
                    activation='softmax',            
                    kernel_initializer='he_normal')(y)          

    # Instantiate model.
    model = Model(inputs=inputs, outputs=outputs)
    print('Model parameters: {:d}'.format(model.count_params()))
    return model

Learning rates:

def lr_schedule(epoch):
    lr = 1e-3
    if epoch > 180:
        lr *= 0.5e-3
    elif epoch > 160:
        lr *= 1e-3
    elif epoch > 120:
        lr *= 1e-2
    elif epoch > 80:
        lr *= 1e-1
    return lr

Compile:

depth=50#50
input_shape = X_train.shape[1:]

model = resnet_v1(input_shape=input_shape, depth=depth)
    
model.compile(loss="categorical_crossentropy",# possible
              optimizer=Adam(lr=lr_schedule(0)),
              metrics=['acc'])

Datagen,batch size,save model,ReduceLROnPlateau :

batch_size =16#32  # orig paper trained all networks with batch_size=128 me 128 crusharei
epochs = 200

# Prepare model model saving directory.
model_name = 'resnet50F1-e{epoch:04d}-loss{loss:.3f}-acc{acc:.3f}-valloss{val_loss:.3f}-valacc{val_acc:.3f}.h5'
if not os.path.isdir(save_dir):
    os.makedirs(save_dir)
filepath = os.path.join(save_dir, model_name)

# Prepare callbacks for model saving and for learning rate adjustment.
checkpoint = ModelCheckpoint(filepath=filepath,
                             monitor='val_acc',
                             verbose=1,
                             save_best_only=True)

lr_scheduler = LearningRateScheduler(lr_schedule)

lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1),
                               cooldown=0,
                               patience=5,
                               min_lr=0.5e-6)

# This will do preprocessing and realtime data augmentation:
datagen = ImageDataGenerator(
    # set input mean to 0 over the dataset
    featurewise_center=True,
    # set each sample mean to 0
    samplewise_center=True,
    # divide inputs by std of dataset
    featurewise_std_normalization=True,#true
    # divide each input by its std
    samplewise_std_normalization=True,
    # apply ZCA whitening
    zca_whitening=False, #true???
    # epsilon for ZCA whitening
    zca_epsilon=1e-06,#05?
    # randomly rotate images in the range (deg 0 to 180)
    rotation_range=0.,
    # randomly shift images horizontally
    width_shift_range=0.,#0.1
    # randomly shift images vertically
    height_shift_range=0.,#0.1
    # set range for random shear
    shear_range=0.,
    # set range for random zoom
    zoom_range=0., 
    # set range for random channel shifts
    channel_shift_range=0,
    # set mode for filling points outside the input boundaries
    fill_mode='nearest',
    # value used for fill_mode = "constant"
    cval=0.,
    # randomly flip images
    horizontal_flip=True,#True
    # randomly flip images
    vertical_flip=False,
    # set rescaling factor (applied before any other transformation)
    rescale=None,#none
    # set function that will be applied on each input
    preprocessing_function=None,
    # image data format, either "channels_first" or "channels_last"
    data_format=None,
    # fraction of images reserved for validation (strictly between 0 and 1)
    validation_split=0.0)

# Compute quantities required for featurewise normalization
# (std, mean, and principal components if ZCA whitening is applied).
datagen.fit(xtrain)

Training:

history = model.fit(datagen.flow(xtrain, t_train, batch_size=batch_size),
                               validation_data=(xtest, t_test),
                               epochs=epochs, verbose=0, workers=4,
                               steps_per_epoch = int(xtrain.shape[0]/batch_size),
                               callbacks=[lr_reducer, lr_scheduler, MyCallback(), checkpoint])


# Score trained model.
scores = model.evaluate(xtest, t_test, verbose=1)
print('Test loss:', scores[0])
print('Test accuracy:', scores[1])

与恶龙缠斗过久,自身亦成为恶龙；凝视深渊过久,深渊将回以凝视…

Categories

python - Model stops improving validation accuracy at 46-49% it is seemingly overfitting

python - Model stops improving validation accuracy at 46-49% it is seemingly overfitting

Please log in or register to add a comment.

Please log in or register to answer this question.

1 Answer

Please log in or register to add a comment.

Just Browsing Browsing

Most popular tags