""" #Trains a ResNet on the CIFAR10 dataset. ResNet v1: [Deep Residual Learning for Image Recognition ](https://arxiv.org/pdf/1512.03385.pdf) ResNet v2: [Identity Mappings in Deep Residual Networks ](https://arxiv.org/pdf/1603.05027.pdf) Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti :------------|--:|-------:|-----------------------:|---: ResNet20 v1| 3| 92.16 %| 91.25 %|35 ResNet32 v1| 5| 92.46 %| 92.49 %|50 ResNet44 v1| 7| 92.50 %| 92.83 %|70 ResNet56 v1| 9| 92.71 %| 93.03 %|90 ResNet110 v1| 18| 92.65 %| 93.39+-.16 %|165 ResNet164 v1| 27| - %| 94.07 %| - ResNet1001 v1|N/A| - %| 92.39 %| - Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti :------------|--:|-------:|-----------------------:|---: ResNet20 v2| 2| - %| - %|--- ResNet32 v2|N/A| NA %| NA %| NA ResNet44 v2|N/A| NA %| NA %| NA ResNet56 v2| 6| 93.01 %| NA %|100 ResNet110 v2| 12| 93.15 %| 93.63 %|180 ResNet164 v2| 18| - %| 94.54 %| - ResNet1001 v2|111| - %| 95.08+-.14 %| - """ import os import sys import glob import numpy as np import tensorflow as tf import scipy import scipy.io import keras from keras.models import Model, Sequential from keras.layers import * from keras.optimizers import Adam from keras import regularizers from keras import backend as K from keras.utils import to_categorical from keras.preprocessing.image import ImageDataGenerator from keras.callbacks import LearningRateScheduler from keras.datasets import cifar10 from Benchmark import Benchmark from Config import MODEL_PARAMS_DIR # Training parameters batch_size = 32 # orig paper trained all networks with batch_size=128 epochs = 200 # Model parameter # ---------------------------------------------------------------------------- # | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch # Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti # |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) # ---------------------------------------------------------------------------- # ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) # ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) # ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) # ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) # ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) # ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) # ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) # --------------------------------------------------------------------------- n = 3 # Model version # Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) version = 1 # Computed depth from supplied model parameter n if version == 1: depth = n * 6 + 2 elif version == 2: depth = n * 9 + 2 # Model name, depth and version model_type = 'ResNet%dv%d' % (depth, version) def resnet_layer(inputs, num_filters=16, kernel_size=3, strides=1, activation='relu', batch_normalization=True, conv_first=True): """2D Convolution-Batch Normalization-Activation stack builder # Arguments inputs (tensor): input tensor from input image or previous layer num_filters (int): Conv2D number of filters kernel_size (int): Conv2D square kernel dimensions strides (int): Conv2D square stride dimensions activation (string): activation name batch_normalization (bool): whether to include batch normalization conv_first (bool): conv-bn-activation (True) or bn-activation-conv (False) # Returns x (tensor): tensor as input to the next layer """ conv = Conv2D(num_filters, kernel_size=kernel_size, strides=strides, padding='valid', # NOTE: using valid convs with explicit pad operation kernel_initializer='he_normal', kernel_regularizer=regularizers.l2(1e-4)) padding_value = int((kernel_size - 1) / 2) zero_padding = ZeroPadding2D(padding = (padding_value, padding_value)) # FIXME: Temporarily disabled batch normalization batch_normalization = False x = inputs x = zero_padding(x) if conv_first: x = conv(x) if batch_normalization: x = BatchNormalization()(x) if activation is not None: x = Activation(activation)(x) else: if batch_normalization: x = BatchNormalization()(x) if activation is not None: x = Activation(activation)(x) x = conv(x) return x class ResNet18_CIFAR10(Benchmark): def lr_schedule(self, epoch): """Learning Rate Schedule Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. Called automatically every epoch as part of callbacks during training. # Arguments epoch (int): The number of epochs # Returns lr (float32): learning rate """ lr = 1e-3 if epoch > 180: lr *= 0.5e-3 elif epoch > 160: lr *= 1e-3 elif epoch > 120: lr *= 1e-2 elif epoch > 80: lr *= 1e-1 return lr def resnet_v0(self, input_shape, depth, num_classes=10): """ResNet Version 1 Model builder [a] Stacks of 2 x (3 x 3) Conv2D-BN-ReLU Last ReLU is after the shortcut connection. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filters is doubled. Within each stage, the layers have the same number filters and the same number of filters. Features maps sizes: stage 0: 32x32, 16 stage 1: 16x16, 32 stage 2: 8x8, 64 The Number of parameters is approx the same as Table 6 of [a]: ResNet20 0.27M ResNet32 0.46M ResNet44 0.66M ResNet56 0.85M ResNet110 1.7M # Arguments input_shape (tensor): shape of input image tensor depth (int): number of core convolutional layers num_classes (int): number of classes (CIFAR10 has 10) # Returns model (Model): Keras model instance """ if (depth - 2) % 6 != 0: raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') # Start model definition. num_filters = 16 num_res_blocks = int((depth - 2) / 6) inputs = Input(shape=input_shape) x = resnet_layer(inputs=inputs) # Instantiate the stack of residual units for stack in range(3): for res_block in range(num_res_blocks): strides = 1 if stack > 0 and res_block == 0: # first layer but not first stack strides = 2 # downsample y = resnet_layer(inputs=x, num_filters=num_filters, strides=strides) y = resnet_layer(inputs=y, num_filters=num_filters, activation=None) if stack > 0 and res_block == 0: # first layer but not first stack # linear projection residual shortcut connection to match # changed dims x = resnet_layer(inputs=x, num_filters=num_filters, kernel_size=1, strides=strides, activation=None, batch_normalization=False) x = keras.layers.add([x, y]) x = Activation('relu')(x) num_filters *= 1 # Add classifier on top. # v1 does not use BN after last shortcut connection-ReLU #-- x = AveragePooling2D(pool_size=8)(x) y = Flatten()(x) x = Dense(64)(y) outputs = Dense(num_classes, activation='softmax', kernel_initializer='he_normal')(x) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model def resnet_v1_1(self, input_shape, depth, num_classes=10): """ResNet Version 1 Model builder [a] Stacks of 2 x (3 x 3) Conv2D-BN-ReLU Last ReLU is after the shortcut connection. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filters is doubled. Within each stage, the layers have the same number filters and the same number of filters. Features maps sizes: stage 0: 32x32, 16 stage 1: 16x16, 32 stage 2: 8x8, 64 The Number of parameters is approx the same as Table 6 of [a]: ResNet20 0.27M ResNet32 0.46M ResNet44 0.66M ResNet56 0.85M ResNet110 1.7M # Arguments input_shape (tensor): shape of input image tensor depth (int): number of core convolutional layers num_classes (int): number of classes (CIFAR10 has 10) # Returns model (Model): Keras model instance """ if (depth - 2) % 6 != 0: raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') # Start model definition. num_filters = 16 num_res_blocks = int((depth - 2) / 6) inputs = Input(shape=input_shape) x = resnet_layer(inputs=inputs) # Instantiate the stack of residual units for stack in range(3): for res_block in range(num_res_blocks): strides = 1 if stack > 0 and res_block == 0: # first layer but not first stack strides = 2 # downsample y = resnet_layer(inputs=x, num_filters=num_filters, strides=strides) y = resnet_layer(inputs=y, num_filters=num_filters, activation=None) if stack > 0 and res_block == 0: # first layer but not first stack # linear projection residual shortcut connection to match # changed dims x = resnet_layer(inputs=x, num_filters=num_filters, kernel_size=1, strides=strides, activation=None, batch_normalization=False) x = keras.layers.add([x, y]) x = Activation('relu')(x) num_filters *= 2 x = AveragePooling2D(pool_size=8)(x) y = Flatten()(x) outputs = Dense(num_classes, #activation='softmax', kernel_initializer='he_normal')(y) outputs = Activation('softmax')(outputs) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model def resnet_v2(self, input_shape, depth, num_classes=10): """ResNet Version 2 Model builder [b] Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as bottleneck layer First shortcut connection per layer is 1 x 1 Conv2D. Second and onwards shortcut connection is identity. At the beginning of each stage, the feature map size is halved (downsampled) by a convolutional layer with strides=2, while the number of filter maps is doubled. Within each stage, the layers have the same number filters and the same filter map sizes. Features maps sizes: conv1 : 32x32, 16 stage 0: 32x32, 64 stage 1: 16x16, 128 stage 2: 8x8, 256 # Arguments input_shape (tensor): shape of input image tensor depth (int): number of core convolutional layers num_classes (int): number of classes (CIFAR10 has 10) # Returns model (Model): Keras model instance """ if (depth - 2) % 9 != 0: raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') # Start model definition. num_filters_in = 16 num_res_blocks = int((depth - 2) / 9) inputs = Input(shape=input_shape) # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths x = resnet_layer(inputs=inputs, num_filters=num_filters_in, conv_first=True) # Instantiate the stack of residual units for stage in range(3): for res_block in range(num_res_blocks): activation = 'relu' batch_normalization = True strides = 1 if stage == 0: num_filters_out = num_filters_in * 4 if res_block == 0: # first layer and first stage activation = None batch_normalization = False else: num_filters_out = num_filters_in * 2 if res_block == 0: # first layer but not first stage strides = 2 # downsample # bottleneck residual unit y = resnet_layer(inputs=x, num_filters=num_filters_in, kernel_size=1, strides=strides, activation=activation, batch_normalization=batch_normalization, conv_first=False) y = resnet_layer(inputs=y, num_filters=num_filters_in, conv_first=False) y = resnet_layer(inputs=y, num_filters=num_filters_out, kernel_size=1, conv_first=False) if res_block == 0: # linear projection residual shortcut connection to match # changed dims x = resnet_layer(inputs=x, num_filters=num_filters_out, kernel_size=1, strides=strides, activation=None, batch_normalization=False) x = keras.layers.add([x, y]) num_filters_in = num_filters_out # Add classifier on top. # v2 has BN-ReLU before Pooling x = BatchNormalization()(x) x = Activation('relu')(x) x = AveragePooling2D(pool_size=8)(x) y = Flatten()(x) outputs = Dense(num_classes, activation='softmax', kernel_initializer='he_normal')(y) # Instantiate model. model = Model(inputs=inputs, outputs=outputs) return model def buildModel(self): depth = 20 input_shape = (3, 32, 32) if version == 2: model = self.resnet_v2(input_shape=input_shape, depth=depth) else: model = self.resnet_v1_1(input_shape=input_shape, depth=depth) return model def data_preprocess(self): (X_train, y_train), (X_val, y_val) = cifar10.load_data() X_train = X_train / 255.0 mean = np.mean(X_train) std = np.std(X_train) X_train = (X_train - mean) X_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_input.bin', dtype=np.float32) y_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_labels.bin', dtype=np.uint32) X_test = X_test.reshape((-1,3,32,32)) X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_input.bin', dtype=np.float32) y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_labels.bin', dtype=np.uint32) X_tuner = X_tuner.reshape((-1,3,32,32)) return X_train, y_train, X_test, y_test, X_tuner, y_tuner def trainModel(self, model, X_train, y_train, X_test, y_test): y_train = to_categorical(y_train, self.num_classes) y_test = to_categorical(y_test, self.num_classes) model.compile( loss='categorical_crossentropy', optimizer=Adam(lr=self.lr_schedule(0)), metrics=['accuracy'] ) lr_scheduler = LearningRateScheduler(self.lr_schedule) lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, patience=5, min_lr=0.5e-6) callbacks = [lr_reducer, lr_scheduler] # Run training, with or without data augmentation. if not data_augmentation: print('Not using data augmentation.') model.fit(X_train, y_train, batch_size=batch_size, epochs=epochs, validation_data=(X_test, y_test), shuffle=True, callbacks=callbacks) else: print('Using real-time data augmentation.') # This will do preprocessing and realtime data augmentation: datagen = ImageDataGenerator( # set input mean to 0 over the dataset featurewise_center=False, # set each sample mean to 0 samplewise_center=False, # divide inputs by std of dataset featurewise_std_normalization=False, # divide each input by its std samplewise_std_normalization=False, # apply ZCA whitening zca_whitening=False, # epsilon for ZCA whitening zca_epsilon=1e-06, # randomly rotate images in the range (deg 0 to 180) rotation_range=0, # randomly shift images horizontally width_shift_range=0.1, # randomly shift images vertically height_shift_range=0.1, # set range for random shear shear_range=0., # set range for random zoom zoom_range=0., # set range for random channel shifts channel_shift_range=0., # set mode for filling points outside the input boundaries fill_mode='nearest', # value used for fill_mode = "constant" cval=0., # randomly flip images horizontal_flip=True, # randomly flip images vertical_flip=False, # set rescaling factor (applied before any other transformation) rescale=None, # set function that will be applied on each input preprocessing_function=None, # image data format, either "channels_first" or "channels_last" data_format="channels_first", # fraction of images reserved for validation (strictly between 0 and 1) validation_split=0.0) # Compute quantities required for featurewise normalization # (std, mean, and principal components if ZCA whitening is applied). datagen.fit(X_train) # Fit the model on the batches generated by datagen.flow(). model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), validation_data=(X_test, y_test), epochs=epochs, verbose=1, workers=4, callbacks=callbacks) return model if __name__ == '__main__': os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Changing to NCHW format K.set_image_data_format('channels_first') ### Parameters specific to each benchmark reload_dir = MODEL_PARAMS_DIR + '/resnet18_cifar10/' keras_model_file = MODEL_PARAMS_DIR + '/keras/resnet18_cifar10.h5' data_dir = 'data/resnet18_cifar10/' src_dir = 'src/resnet18_cifar10_src/' num_classes = 10 batch_size = 500 model = ResNet18_CIFAR10('ResNet18_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) model.exportToHPVM(sys.argv)