mobilenetv2 with batch norm

d0cbb1f2 · nz11 · c5b85da7 · d0cbb1f2
Commit d0cbb1f2 authored 5 years ago by nz11
--- a/llvm/projects/keras/src/mobilenetv2_cifar10.py
+++ b/llvm/projects/keras/src/mobilenetv2_cifar10.py
+import sys
+import os
+os.environ['CUDA_VISIBLE_DEVICES'] = '1'
+
+from keras.models import Sequential
+from keras.layers import *
+from keras.datasets import cifar10
+from keras.utils import to_categorical
+from keras.callbacks import *
+from keras.preprocessing.image import ImageDataGenerator
+from keras.models import Model
+from keras import optimizers
+import keras.backend as K
+
+
+K.set_image_data_format('channels_first')
+
+(X_train, y_train), (X_test, y_test) = cifar10.load_data()
+test_labels = y_test
+
+print ("X_train.shape = ", X_train.shape)
+print ("X_test.shape = ", X_test.shape)
+
+
+X_train = X_train.astype('float32')
+X_test = X_test.astype('float32')
+
+mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True)
+std = np.std(X_train, axis=(0, 1, 2), keepdims=True)
+X_train = (X_train - mean) / (std + 1e-9)
+X_test = (X_test - mean) / (std + 1e-9)
+
+y_train = to_categorical(y_train, num_classes=10)
+y_test = to_categorical(y_test, num_classes=10)
+
+
+def _make_divisible(v, divisor, min_value=None):
+    if min_value is None:
+        min_value = divisor
+    new_v = max(min_value, int(v + divisor / 2) // divisor * divisor)
+    # Make sure that round down does not go down by more than 10%.
+    if new_v < 0.9 * v:
+        new_v += divisor
+    return new_v
+
+
+# define the calcuration of each 'Res_Block'
+def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id):
+    prefix = 'block_{}_'.format(block_id)
+
+    in_channels = inputs._keras_shape[-1]
+    pointwise_conv_filters = int(filters * alpha)
+    pointwise_filters = _make_divisible(pointwise_conv_filters, 8)
+    x = inputs
+
+    # Expand
+    if block_id:
+        x = Conv2D(expansion * in_channels, kernel_size=1, strides=1, padding='same', use_bias=False, activation=None, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name=prefix + 'expand')(x)
+        x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'expand_BN')(x)
+        x = ReLU(6., name=prefix + 'expand_relu')(x)
+    else:
+        prefix = 'expanded_conv_'
+
+    # Depthwise
+    x = DepthwiseConv2D(kernel_size=3, strides=stride, activation=None, use_bias=False, padding='same', kernel_initializer="he_normal", depthwise_regularizer=regularizers.l2(4e-5), name=prefix + 'depthwise')(x)
+    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'depthwise_BN')(x)
+    x = ReLU(6., name=prefix + 'depthwise_relu')(x)
+
+    # Project
+    x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='same', use_bias=False, activation=None, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name=prefix + 'project')(x)
+    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x)
+
+
+    if in_channels == pointwise_filters and stride == 1:
+        return Add(name=prefix + 'add')([inputs, x])
+    return x
+
+# build MobileNetV2 models
+def get_mobilenetv2(alpha=1.0, depth_multiplier=1):
+
+    # fileter size (first block)
+    first_block_filters = _make_divisible(32 * alpha, 8)
+    # input shape  (first block)
+    img_input = Input(shape=input_shape)
+
+    # model architechture
+    x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='same', use_bias=False, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name='Conv1')(img_input)
+    #x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x)
+    #x = ReLU(6., name='Conv1_relu')(x)
+
+    x = _inverted_res_block(x, filters=16,  alpha=alpha, stride=1, expansion=1, block_id=0 )
+
+    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=1 )
+    x = _inverted_res_block(x, filters=24,  alpha=alpha, stride=1, expansion=6, block_id=2 )
+
+    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=2, expansion=6, block_id=3 )
+    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=4 )
+    x = _inverted_res_block(x, filters=32,  alpha=alpha, stride=1, expansion=6, block_id=5 )
+
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=2, expansion=6, block_id=6 )
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=7 )
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=8 )
+    x = _inverted_res_block(x, filters=64,  alpha=alpha, stride=1, expansion=6, block_id=9 )
+    x = Dropout(rate=0.25)(x)
+
+    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=10)
+    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=11)
+    x = _inverted_res_block(x, filters=96,  alpha=alpha, stride=1, expansion=6, block_id=12)
+    x = Dropout(rate=0.25)(x)
+
+    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13)
+    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14)
+    x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15)
+    x = Dropout(rate=0.25)(x)
+
+    x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16)
+    x = Dropout(rate=0.25)(x)
+
+    # define fileter size (last block)
+    if alpha > 1.0:
+        last_block_filters = _make_divisible(1280 * alpha, 8)
+    else:
+        last_block_filters = 1280
+
+
+    x = Conv2D(last_block_filters, kernel_size=1, use_bias=False, kernel_initializer="he_normal", kernel_regularizer=regularizers.l2(4e-5), name='Conv_1')(x)
+    x = BatchNormalization(epsilon=1e-3, momentum=0.999, name='Conv_1_bn')(x)
+    x = ReLU(6., name='out_relu')(x)
+    
+
+    x = AveragePooling2D(pool_size=2)(x)
+    x = Flatten()(x)
+    x = Dense(10, activation='softmax')(x)
+        
+    model = Model(inputs=img_input, outputs=x)
+    return model
+
+    
+# data augmentation, horizontal flips only
+datagen = ImageDataGenerator(
+        featurewise_center=False,
+        featurewise_std_normalization=False,
+        rotation_range=0.0,
+        width_shift_range=0.0,
+        height_shift_range=0.0,
+        vertical_flip=False,
+        horizontal_flip=True)
+datagen.fit(X_train)
+
+
+model = get_mobilenetv2()
+
+learning_rates=[]
+for i in range(5):
+    learning_rates.append(2e-2)
+for i in range(50-5):
+    learning_rates.append(1e-2)
+for i in range(100-50):
+    learning_rates.append(8e-3)
+for i in range(150-100):
+    learning_rates.append(4e-3)
+for i in range(200-150):
+    learning_rates.append(2e-3)
+for i in range(300-200):
+    learning_rates.append(1e-3)
+
+callbacks = [
+    LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))
+]
+
+model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), 
+                       loss='categorical_crossentropy', 
+                       metrics=['accuracy'])
+
+model.fit_generator(
+    datagen.flow(X_train, y_train, batch_size=128),
+    steps_per_epoch=int(np.ceil(50000 / 128)),
+    validation_data=(X_test, y_test),
+    epochs=300,
+    callbacks=callbacks
+)
+