diff --git a/hpvm/projects/onnx/README.md b/hpvm/projects/onnx/README.md deleted file mode 100644 index eba07cd9d4102283362bbd021f28ff59b3755b0e..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/README.md +++ /dev/null @@ -1,13 +0,0 @@ -## Importing Conda Environment: - -conda env create -f onnx\_environment.yml - -## Activate/deactivate Conda Environment - -conda activate onnx\_frontend - -## Building and Installing Frontend for ONNX: - -python setup.py build - -python setup.py install diff --git a/hpvm/projects/onnx/models/alexnet/alexnet.onnx b/hpvm/projects/onnx/models/alexnet/alexnet.onnx deleted file mode 100644 index d87fa5fe75eddf8ae2823bd0ace120bbacd50cdf..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/alexnet.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_0.npz b/hpvm/projects/onnx/models/alexnet/test_data_0.npz deleted file mode 100644 index 0089f591dc76a98182e0384c70874fca701d3469..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_0.npz and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_1.npz b/hpvm/projects/onnx/models/alexnet/test_data_1.npz deleted file mode 100644 index 30b9d311981525cff5ef71ad588a763eb403c1b4..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_1.npz and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_2.npz b/hpvm/projects/onnx/models/alexnet/test_data_2.npz deleted file mode 100644 index f73145dc9154aacf13455de9a865889d54efe263..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_2.npz and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb deleted file mode 100644 index 0dbdb3ecde433c70d5201cce0343b396261ca9e0..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_0/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb deleted file mode 100644 index c9a100142c98c684ce9358966c6ae4b99e62521d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_0/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb deleted file mode 100644 index 968e6125f1ab45d6d3a1c2067309f05a2aab689a..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_1/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb deleted file mode 100644 index c841190b7499f8eded7a484e691254a5aafbc4db..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_1/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb deleted file mode 100644 index 189e616b49d2fa21fae9a32425e24cd263ca0203..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_2/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb deleted file mode 100644 index ec17185491fa73d344cb46415185422bc7f416bc..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_2/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb deleted file mode 100644 index 3d91313b14fb444c268f57e8bb05f8c38421107d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_3/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb deleted file mode 100644 index 49e4e4f83c8384886ceb08283a6b46d2430ac1b9..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_3/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb deleted file mode 100644 index 72d611c195d20032b4953562f4722b4e1ddedff8..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_4/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb deleted file mode 100644 index dfe5b65c8260d684171cc66e3759cb66e02a1ed7..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_4/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb deleted file mode 100644 index f737585597ce2b56253e37f75f3defbbfce30cec..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_5/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb b/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb deleted file mode 100644 index 6374baf7869b08c98c3b43aebbe58e3d15e99774..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/alexnet/test_data_set_5/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/alexnet.onnx b/hpvm/projects/onnx/models/keras/alexnet.onnx deleted file mode 100644 index 452d9f846fad21b5fd89321e86b8a7945c625d2d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/alexnet.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/alexnet2.onnx b/hpvm/projects/onnx/models/keras/alexnet2.onnx deleted file mode 100644 index e1147a8e589243004411b2b4f958a2240f3df94d..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/alexnet2.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/alexnet8k.onnx b/hpvm/projects/onnx/models/keras/alexnet8k.onnx deleted file mode 100644 index 49f74d5f2f4dfb74f266b174a5a72d5c7add0cc1..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/alexnet8k.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/alexnet_last.onnx b/hpvm/projects/onnx/models/keras/alexnet_last.onnx deleted file mode 100644 index f9f82e3cbfd79df383afe3c61540fdfded45a976..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/alexnet_last.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/lenet.onnx b/hpvm/projects/onnx/models/keras/lenet.onnx deleted file mode 100644 index 659764f2f46d05dad62c2187c13c665839fe3058..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/lenet.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/mobilenet.onnx b/hpvm/projects/onnx/models/keras/mobilenet.onnx deleted file mode 100644 index 947ab70daf9653af5f7d84349f92aca79d31c131..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/mobilenet.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx b/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx deleted file mode 100644 index 998d7f05b540b97e0fa36daab280aebbb3574eff..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/mobilenet_cifar10.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/resnet.onnx b/hpvm/projects/onnx/models/keras/resnet.onnx deleted file mode 100644 index b9ad6bc843e3167151e74ad1a720130ef8421b4e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/resnet.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx b/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx deleted file mode 100644 index 2395a9770aaf0df87d8e2acdad0935a8058fed3b..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/vgg16-cifar10.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx b/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx deleted file mode 100644 index a96ddf4afffca4528d27b4f4fd482aeddc8ae165..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/keras/vgg16_cifar10.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/mnist/mnist.onnx b/hpvm/projects/onnx/models/mnist/mnist.onnx deleted file mode 100644 index fc1a3f733c6e6243dd23dacb125b7a372de55a50..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/mnist/mnist.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb deleted file mode 100644 index f0072d51a480af615e92312608f75993be9f1136..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/mnist/test_data_set_0/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb deleted file mode 100644 index a6f4cdf92e27aaab21e44098b5b28e16048098e2..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/models/mnist/test_data_set_0/output_0.pb +++ /dev/null @@ -1,2 +0,0 @@ - -J(ãêsDU®ÄŒtÍEÚ'DWQeÄYôÐÄQôÄ3vÂNKBÄñ³Ä \ No newline at end of file diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb deleted file mode 100644 index b40ca9538b7b03111268892ca8d7c71a5e376d06..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/mnist/test_data_set_1/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb deleted file mode 100644 index 786b006694a575f64fa212cb60d60d962fc042b0..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/models/mnist/test_data_set_1/output_0.pb +++ /dev/null @@ -1,2 +0,0 @@ - -J(E_ÅÓ;ùÒÄXê“Äy›Ä„*_DHԺÓ!‘Ã9ZÞ \ No newline at end of file diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb deleted file mode 100644 index d84a2064893ba68ad633391af74744dfc991a7cd..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/mnist/test_data_set_2/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb deleted file mode 100644 index cb22e7b4ff7c12b9ec73ab832e34c3a566a51077..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/models/mnist/test_data_set_2/output_0.pb +++ /dev/null @@ -1,2 +0,0 @@ - -J(láÅ4‹Ä’†DMWÄ‘ÆDË¿Ä>á'Åìa¤&•B6hE \ No newline at end of file diff --git a/hpvm/projects/onnx/models/resnet50/resnet50.onnx b/hpvm/projects/onnx/models/resnet50/resnet50.onnx deleted file mode 100644 index 09992ccb764246e182874eda716d535c83a82123..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/resnet50.onnx and /dev/null differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb deleted file mode 100644 index 846b581f7683fe007535309dd7bba5e356e524f6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/test_data_set_0/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb deleted file mode 100644 index 5c357e69d8f002e509525cbf8bbb470fa6bc575e..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/test_data_set_0/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb deleted file mode 100644 index 0454c0cd3e58115a8805a1f770ea5406b5a72dbb..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/test_data_set_1/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb deleted file mode 100644 index 631cf2cad41b3d582b4d792ef480f8fcdec261df..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/test_data_set_1/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb deleted file mode 100644 index 665be39c8ee7136b9341f5ecdb370433d8afc910..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/test_data_set_2/input_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb b/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb deleted file mode 100644 index ac41dadf34359eb8ea9d185ad181138b32e09cec..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/models/resnet50/test_data_set_2/output_0.pb and /dev/null differ diff --git a/hpvm/projects/onnx/profile/alexnet_keras.py b/hpvm/projects/onnx/profile/alexnet_keras.py deleted file mode 100644 index bcbf35f0c632eb27d300a94fde5882761701c53b..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/profile/alexnet_keras.py +++ /dev/null @@ -1,123 +0,0 @@ -import numpy as np -from keras.datasets import cifar10 -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Flatten, Activation -from keras.layers.convolutional import Conv2D -from keras.optimizers import Adam -from keras.layers.pooling import MaxPooling2D -from keras.utils.np_utils import to_categorical -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K -from keras import regularizers -from keras.callbacks import LearningRateScheduler -import sys -import struct -import keras -import numpy as np -import os -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -def buildModel2(): - - activation_type = "tanh" - weight_decay = 1e-4 - - model = Sequential() - model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type, - input_shape=(3, 32, 32), padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.2)) - model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay))) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.3)) - - model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.4)) - - model.add(Flatten()) - #model.add(Flatten()) - #model.add(Dense(256)) - model.add(Dense(10)) - model.add(Activation('softmax')) - - return model - -def lr_schedule(epoch): - lrate = 0.001 - if epoch > 20: - lrate = 0.0005 - if epoch > 40: - lrate = 0.0003 - if epoch > 60: - lrate = 0.0001 - if epoch > 80: - lrate = 0.00005 - - return lrate - -def trainModel(model): - #dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" - - #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) - # Compile the model - model.compile(loss='categorical_crossentropy', - optimizer=Adam(lr=0.0001, decay=1e-6), - #optimizer = opt_rms, - metrics=['accuracy']) - - #print to_categorical(Y_train, 10) - print (to_categorical(Y_train)) - - - datagen = ImageDataGenerator( - rotation_range=15, - width_shift_range=0.1, - height_shift_range=0.1, - horizontal_flip=True, - ) - datagen.fit(X_train) - - - model.fit(X_train, to_categorical(Y_train, 10), - batch_size=128, - shuffle=True, - epochs = 1, - #epochs=100, - validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)]) - - # Evaluate the model - scores = model.evaluate(X_test.astype('float32'), to_categorical(Y_test, 10)) - - print('Loss: %.3f' % scores[0]) - print('Accuracy: %.3f' % scores[1]) - - print ("*** TRAINED MODEL ****\n") - -K.set_image_data_format('channels_first') -(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() -test_labels = Y_test -train_labels = Y_train - -#X_train = X_train.astype('float32') -#X_test = X_test.astype('float32') -X_train = X_train / 255.0 -X_test = X_test / 255.0 - -mean = np.mean(X_train,axis=(0,1,2,3)) -std = np.std(X_train,axis=(0,1,2,3)) -X_train = (X_train-mean)/(std+1e-7) -X_test = (X_test-mean)/(std+1e-7) -model = buildModel2() -trainModel(model) -import time -start = time.time() -keras_result = model.predict(X_test[:8000]) -print("time:", time.time() - start) \ No newline at end of file diff --git a/hpvm/projects/onnx/profile/alexnet_onnx.py b/hpvm/projects/onnx/profile/alexnet_onnx.py deleted file mode 100644 index 1bde4eaab39d901ef131f7cda59810721d701e1b..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/profile/alexnet_onnx.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -from keras.datasets import cifar10 -from keras import backend as K -import numpy as np -import os -import keras2onnx -import onnx -import onnxruntime -import time -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -K.set_image_data_format('channels_first') -(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() -test_labels = Y_test -train_labels = Y_train - -#X_train = X_train.astype('float32') -#X_test = X_test.astype('float32') -X_train = X_train / 255.0 -X_test = X_test / 255.0 - -mean = np.mean(X_train,axis=(0,1,2,3)) -std = np.std(X_train,axis=(0,1,2,3)) -X_train = (X_train-mean)/(std+1e-7) -X_test = (X_test-mean)/(std+1e-7) - -sess = onnxruntime.InferenceSession("../models/keras/alexnet.onnx") - -input_name = sess.get_inputs()[0].name -print("Input name :", input_name) -input_shape = sess.get_inputs()[0].shape -print("Input shape :", input_shape) -input_type = sess.get_inputs()[0].type -print("Input type :", input_type) - -output_name = sess.get_outputs()[0].name -print("Output name :", output_name) -output_shape = sess.get_outputs()[0].shape -print("Output shape :", output_shape) -output_type = sess.get_outputs()[0].type -print("Output type :", output_type) - -start_time = time.time() -ort_result = sess.run([output_name], {input_name: X_test.astype('float32')[:8000]}) -print("time: ", time.time() - start_time) \ No newline at end of file diff --git a/hpvm/projects/onnx/profile/alexnet_tvm.py b/hpvm/projects/onnx/profile/alexnet_tvm.py deleted file mode 100644 index 5a3847335f43e0d9af9a5a51fd1ba90582f22489..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/profile/alexnet_tvm.py +++ /dev/null @@ -1,45 +0,0 @@ -import numpy as np -from keras.datasets import cifar10 -from keras import backend as K -import sys -import struct -import numpy as np -import os -import tvm -import tvm.relay as relay -from tvm.contrib import graph_runtime -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -K.set_image_data_format('channels_last') -(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() -X_test = X_test / 255.0 -mean = np.mean(X_train,axis=(0,1,2,3)) -std = np.std(X_train,axis=(0,1,2,3)) -X_test = (X_test-mean)/(std+1e-7) - -import onnx -onnx_model = onnx.load("../models/keras/alexnet_last.onnx") - -input_name = 'conv2d_8_input' -input_size = 8000 -shape_dict = {input_name: X_test[:input_size].shape} -mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) -target = 'cuda -libs=cudnn,cublas' -with relay.build_config(opt_level=3): - graph, lib, params = relay.build(mod, target, params=params) - -import time -ctx = tvm.gpu() -#data = np.random.uniform(-1, 1, size=data_shape).astype("float32") -# create module -module = graph_runtime.create(graph, lib, ctx) -# set input and parameters -module.set_input("conv2d_8_input", X_test[:input_size].astype("float32")) -module.set_input(**params) -# run -start_time = time.time() -module.run() -out_shape = (input_size, 10) -# get output -out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy() -print("Time:", time.time() - start_time) \ No newline at end of file diff --git a/hpvm/projects/onnx/profile/prof.nvvp b/hpvm/projects/onnx/profile/prof.nvvp deleted file mode 100644 index be5cbd996b6980e9e4bad773fe9f3abea1ccb0f6..0000000000000000000000000000000000000000 Binary files a/hpvm/projects/onnx/profile/prof.nvvp and /dev/null differ diff --git a/hpvm/projects/onnx/specs.dump b/hpvm/projects/onnx/specs.dump deleted file mode 100644 index 3016325b9bb2f94c3d66f0185408cccfa7cf9a43..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/specs.dump +++ /dev/null @@ -1,1051 +0,0 @@ - -Computer -******** - - -Summary -------- - --Computer- -Processor : 40x Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz -Memory : 65830MB (28703MB used) -Operating System : Ubuntu 16.04.6 LTS -User Name : ys26 (Yuanjing Shi) -Date/Time : Tue 31 Mar 2020 07:33:16 PM CDT --Display- -Resolution : 0x0 pixels -OpenGL Renderer : Unknown -X11 Vendor : (null) --Multimedia- -Audio Adapter : HDA-Intel - HDA Intel PCH -Audio Adapter : HDA-Intel - HDA NVidia -Audio Adapter : HDA-Intel - HDA NVidia --Input Devices- - Power Button - Power Button - HDA Intel PCH Front Mic - HDA Intel PCH Rear Mic - HDA Intel PCH Line - HDA Intel PCH Line Out Front - HDA Intel PCH Line Out Surround - HDA Intel PCH Line Out CLFE - HDA Intel PCH Front Headphone - HDA NVidia HDMI/DP,pcm : 3= - HDA NVidia HDMI/DP,pcm : 7= - HDA NVidia HDMI/DP,pcm : 8= - HDA NVidia HDMI/DP,pcm : 9= - HDA NVidia HDMI/DP,pcm : 3= - HDA NVidia HDMI/DP,pcm : 7= - HDA NVidia HDMI/DP,pcm : 8= - HDA NVidia HDMI/DP,pcm : 9= - Dell Dell USB Keyboard - Logitech USB-PS/2 Optical Mouse --Printers- -No printers found --SCSI Disks- -ATA Samsung SSD 850 -ATA Samsung SSD 850 -Generic Ultra HS-SD/MMC - -Operating System ----------------- - --Version- -Kernel : Linux 4.15.0-66-generic (x86_64) -Compiled : #75~16.04.1-Ubuntu SMP Tue Oct 1 14:01:08 UTC 2019 -C Library : Unknown -Default C Compiler : GNU C Compiler version 5.4.0 20160609 (Ubuntu 5.4.0-6ubuntu1~16.04.11) -Distribution : Ubuntu 16.04.6 LTS --Current Session- -Computer Name : tyler -User Name : ys26 (Yuanjing Shi) -Home Directory : /home/ys26 -Desktop Environment : Terminal --Misc- -Uptime : 67 days, 16 hours and 1 minute -Load Average : 19.85, 20.81, 20.51 - -Kernel Modules --------------- - --Loaded Modules- -nvidia_uvm -nfsv3 -nfs_acl -nfs -lockd : NFS file locking service version 0.5. -grace -fscache : FS Cache Manager -nvram -video : ACPI Video Driver -msr : x86 generic MSR driver -xt_tcpudp : Xtables: TCP, UDP and UDP-Lite match -iptable_filter : iptables filter table -ip_tables : IPv4 packet filter -x_tables : {ip,ip6,arp,eb}_tables backend module -input_leds : Input -> LEDs Bridge -snd_hda_codec_hdmi : HDMI HD-audio codec -binfmt_misc -xfs : SGI XFS with ACLs, security attributes, realtime, no debug enabled -nvidia_drm -nvidia_modeset -nvidia -intel_rapl : Driver for Intel RAPL (Running Average Power Limit) -sb_edac : MC Driver for Intel Sandy Bridge and Ivy Bridge memory controllers - Ver: 1.1.2 -x86_pkg_temp_thermal : X86 PKG TEMP Thermal Driver -intel_powerclamp : Package Level C-state Idle Injection for Intel CPUs -coretemp : Intel Core temperature monitor -snd_hda_codec_realtek : Realtek HD-audio codec -kvm_intel -snd_hda_codec_generic : Generic HD-audio codec parser -kvm -snd_hda_intel : Intel HDA driver -snd_hda_codec : HDA codec core -snd_hda_core : HD-audio bus -snd_hwdep : Hardware dependent layer -irqbypass : IRQ bypass manager utility module -snd_pcm : Midlevel PCM code for ALSA. -intel_wmi_thunderbolt : Intel WMI Thunderbolt force power driver -intel_cstate -snd_seq_midi : Advanced Linux Sound Architecture sequencer MIDI synth. -intel_rapl_perf -snd_seq_midi_event : MIDI byte <-> sequencer event coder -drm_kms_helper : DRM KMS helper -snd_rawmidi : Midlevel RawMidi code for ALSA. -drm : DRM shared core routines -snd_seq : Advanced Linux Sound Architecture sequencer. -snd_seq_device : ALSA sequencer device management -ipmi_devintf : Linux device interface for the IPMI message handler. -snd_timer : ALSA timer interface -ipmi_msghandler : Incoming and outgoing message routing for an IPMI interface. -fb_sys_fops : Generic file read (fb in system RAM) -snd : Advanced Linux Sound Architecture driver for soundcards. -syscopyarea : Generic copyarea (sys-to-sys) -sysfillrect : Generic fill rectangle (sys-to-sys) -sysimgblt : 1-bit/8-bit to 1-32 bit color expansion (sys-to-sys) -lpc_ich : LPC interface for Intel ICH -soundcore : Core sound module -ioatdma -shpchp : Standard Hot Plug PCI Controller Driver -mac_hid -ib_iser : iSER (iSCSI Extensions for RDMA) Datamover -rdma_cm : Generic RDMA CM Agent -iw_cm : iWARP CM -ib_cm : InfiniBand CM -ib_core : core kernel InfiniBand API -iscsi_tcp : iSCSI/TCP data-path -libiscsi_tcp : iSCSI/TCP data-path -libiscsi : iSCSI library functions -scsi_transport_iscsi : iSCSI Transport Interface -parport_pc : PC-style parallel port driver -ppdev -lp -parport -sunrpc -autofs4 -btrfs -zstd_compress : Zstd Compressor -raid10 : RAID10 (striped mirror) personality for MD -raid456 : RAID4/5/6 (striping with parity) personality for MD -async_raid6_recov : asynchronous RAID-6 recovery api -async_memcpy : asynchronous memcpy api -async_pq : asynchronous raid6 syndrome generation/validation -async_xor : asynchronous xor/xor-zero-sum api -async_tx : Asynchronous Bulk Memory Transactions API -xor -raid6_pq : RAID6 Q-syndrome calculations -libcrc32c : CRC32c (Castagnoli) calculations -raid1 : RAID1 (mirroring) personality for MD -raid0 : RAID0 (striping) personality for MD -multipath : simple multi-path personality for MD -linear : Linear device concatenation personality for MD -uas -usb_storage : USB Mass Storage driver for Linux -hid_generic : HID generic driver -usbhid : USB HID core driver -hid -mxm_wmi : MXM WMI Driver -crct10dif_pclmul : T10 DIF CRC calculation accelerated with PCLMULQDQ. -crc32_pclmul -ghash_clmulni_intel : GHASH Message Digest Algorithm, acclerated by PCLMULQDQ-NI -pcbc : PCBC block cipher algorithm -igb : Intel(R) Gigabit Ethernet Network Driver -aesni_intel : Rijndael (AES) Cipher Algorithm, Intel AES-NI instructions optimized -dca -i2c_algo_bit : I2C-Bus bit-banging algorithm -aes_x86_64 : Rijndael (AES) Cipher Algorithm, asm optimized -ahci : AHCI SATA low-level driver -crypto_simd -ptp : PTP clocks support -glue_helper -cryptd : Software async crypto daemon -pps_core : LinuxPPS support (RFC 2783) - ver. 5.3.6 -libahci : Common AHCI SATA low-level routines -wmi : ACPI-WMI Mapping Driver - -Boots ------ - --Boots- -Fri Jan 24 02:31 : 44..15.0-66-generi|still -Mon Nov 18 18:04 : 44..15.0-66-generi|still -Fri Nov 1 1:52 : 44..15.0-66-generi|- -Fri Nov 1 1:25 : 44..15.0-66-generi|- - -Languages ---------- - --Available Languages- -en_US.utf8 : English locale for the USA - -Filesystems ------------ - --Mounted File Systems- -udev /dev 0.00 % (31.4 GiB of 31.4 GiB) -tmpfs /run 7.15 % (5.8 GiB of 6.3 GiB) -/dev/mapper/tyler--vg-root / 5.81 % (1720.1 GiB of 1826.1 GiB) -tmpfs /dev/shm 0.00 % (31.4 GiB of 31.4 GiB) -tmpfs /run/lock 0.08 % (5.0 MiB of 5.0 MiB) -tmpfs /sys/fs/cgroup 0.00 % (31.4 GiB of 31.4 GiB) -/dev/sdb1 /srv/local 9.99 % (1676.1 GiB of 1862.1 GiB) -/dev/sdb1 /nix 9.99 % (1676.1 GiB of 1862.1 GiB) -/dev/sda2 /boot 11.01 % (3.3 GiB of 3.7 GiB) -vadve-file-01.cs.illinois.edu:/srv/home/deepanv2 /home/deepanv2 84.56 % (1580.7 GiB of 10238.0 GiB) -tmpfs /run/user/242233 0.00 % (6.3 GiB of 6.3 GiB) -vadve-file-01.cs.illinois.edu:/srv/home/nz11 /home/nz11 84.56 % (1580.7 GiB of 10238.0 GiB) -tmpfs /run/user/1091162 0.00 % (6.3 GiB of 6.3 GiB) -vadve-file-01.cs.illinois.edu:/srv/home/hsharif3 /home/hsharif3 84.56 % (1580.7 GiB of 10238.0 GiB) -tmpfs /run/user/641600 0.00 % (6.3 GiB of 6.3 GiB) -vadve-file-01.cs.illinois.edu:/srv/home/bjschre2 /home/bjschre2 84.56 % (1580.7 GiB of 10238.0 GiB) -tmpfs /run/user/373498 0.00 % (6.3 GiB of 6.3 GiB) -/etc/autofs/cs_vadve/auto.direct /shared 84.56 % (1580.7 GiB of 10238.0 GiB) -engr-linux-siebl1.engr.illinois.edu:/srv/software/megacli-8.07 /software/megacli-8.07 95.35 % (92.6 GiB of 1992.1 GiB) -vadve-file-01.cs.illinois.edu:/srv/shared /shared 84.56 % (1580.7 GiB of 10238.0 GiB) -engr-linux-siebl1.engr.illinois.edu:/srv/software/cuda-9.1 /software/cuda-9.1 95.35 % (92.6 GiB of 1992.1 GiB) -vadve-file-01.cs.illinois.edu:/srv/home/yifanz16 /home/yifanz16 84.56 % (1580.7 GiB of 10238.0 GiB) -tmpfs /run/user/1086025 0.00 % (6.3 GiB of 6.3 GiB) -vadve-file-01.cs.illinois.edu:/srv/home/ys26 /home/ys26 84.56 % (1580.7 GiB of 10238.0 GiB) -tmpfs /run/user/1055463 0.00 % (6.3 GiB of 6.3 GiB) - -Display -------- - --Display- -Resolution : 0x0 pixels -Vendor : (null) -Version : (null) --Monitors- --Extensions- - -Environment Variables ---------------------- - --Environment Variables- -MANPATH : /usr/share/lmod/lmod/share/man:: -NIX_PROFILES : /nix/var/nix/profiles/default /home/ys26/.nix-profile -XDG_SESSION_ID : 14482 -TERM : xterm-256color -SHELL : /bin/bash -DISTARCH : Linux-x86_64 -MODULEPATH_ROOT : /etc/modulefiles/software -SSH_CLIENT : 10.251.9.206 64268 22 -CONDA_SHLVL : 1 -CONDA_PROMPT_MODIFIER : (base) -LMOD_PKG : /usr/share/lmod/lmod -LMOD_VERSION : 7.7.14 -GTK_MODULES : gail:atk-bridge -SSH_TTY : /dev/pts/7 -USER : ys26 -LMOD_sys : Linux -CONDA_EXE : /home/ys26/anaconda3/bin/conda -_CE_CONDA -MAIL : /var/mail/ys26 -PATH : /home/ys26/anaconda3/bin:/home/ys26/anaconda3/condabin:/home/ys26/.nix-profile/bin:/nix/var/nix/profiles/default/bin:/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin:/usr/games:/usr/local/games:/snap/bin:/home/ys26/.local/bin:/home/ys26/bin -CONDA_PREFIX : /home/ys26/anaconda3 -NIX_PATH : nixpkgs=/nix/var/nix/profiles/per-user/root/channels/nixpkgs:/nix/var/nix/profiles/per-user/root/channels -PWD : /home/ys26 -LANG : en_US.UTF-8 -MODULEPATH : /etc/modulefiles/software/Linux:/etc/modulefiles/software/Core:/usr/share/lmod/lmod/modulefiles/Core:/etc/modulefiles/env:/etc/modulefiles/class:/etc/modulefiles/software -NIX_SSL_CERT_FILE : /etc/ssl/certs/ca-certificates.crt -LMOD_CMD : /usr/share/lmod/lmod/libexec/lmod -_CE_M -KRB5CCNAME : FILE:/tmp/krb5cc_1055463_itHNAb -SHLVL : 1 -HOME : /home/ys26 -LANGUAGE : en_US: -SHOST : tyler -BASH_ENV : /usr/share/lmod/lmod/init/bash -CONDA_PYTHON_EXE : /home/ys26/anaconda3/bin/python -LMOD_arch : x86_64 -LOGNAME : ys26 -XDG_DATA_DIRS : /usr/local/share:/usr/share:/var/lib/snapd/desktop -SOFTPATH : /software -SSH_CONNECTION : 10.251.9.206 64268 130.126.136.179 22 -MODULESHOME : /usr/share/lmod/lmod -CONDA_DEFAULT_ENV : base -LMOD_SETTARG_FULL_SUPPORT : no -ARCH : x86_64 -XDG_RUNTIME_DIR : /run/user/1055463 -LMOD_DIR : /usr/share/lmod/lmod/libexec -NIX_USER_PROFILE_DIR : /nix/var/nix/profiles/per-user/ys26 -BASH_FUNC_module%% : () { eval $($LMOD_CMD bash "$@") && eval $(${LMOD_SETTARG_CMD:-:} -s sh) - -Users ------ - --Users- -root : root -daemon : daemon -bin : bin -sys : sys -sync : sync -games : games -man : man -lp : lp -mail : mail -news : news -uucp : uucp -proxy : proxy -www-data : www-data -backup : backup -list : Mailing List Manager -irc : ircd -gnats : Gnats Bug-Reporting System (admin) -nobody : nobody -systemd-timesync : systemd Time Synchronization -systemd-network : systemd Network Management -systemd-resolve : systemd Resolver -systemd-bus-proxy : systemd Bus Proxy -syslog -_apt -messagebus -lxd -dnsmasq : dnsmasq -sshd -statd -postfix -colord : colord colour management daemon -ntp -clamav -sensu : Sensu Monitoring Framework -telegraf -lightdm : Light Display Manager -whoopsie -avahi-autoipd : Avahi autoip daemon -avahi : Avahi mDNS daemon -speech-dispatcher : Speech Dispatcher -hplip : HPLIP system user -kernoops : Kernel Oops Tracking Daemon -pulse : PulseAudio daemon -rtkit : RealtimeKit -saned -usbmux : usbmux daemon -nixbld1 : Nix build user 1 -nixbld2 : Nix build user 2 -nixbld3 : Nix build user 3 -nixbld4 : Nix build user 4 -nixbld5 : Nix build user 5 -nixbld6 : Nix build user 6 -nixbld7 : Nix build user 7 -nixbld8 : Nix build user 8 -nixbld9 : Nix build user 9 -nixbld10 : Nix build user 10 -nixbld11 : Nix build user 11 -nixbld12 : Nix build user 12 -nixbld13 : Nix build user 13 -nixbld14 : Nix build user 14 -nixbld15 : Nix build user 15 -nixbld16 : Nix build user 16 -nixbld17 : Nix build user 17 -nixbld18 : Nix build user 18 -nixbld19 : Nix build user 19 -nixbld20 : Nix build user 20 -nixbld21 : Nix build user 21 -nixbld22 : Nix build user 22 -nixbld23 : Nix build user 23 -nixbld24 : Nix build user 24 -nixbld25 : Nix build user 25 -nixbld26 : Nix build user 26 -nixbld27 : Nix build user 27 -nixbld28 : Nix build user 28 -nixbld29 : Nix build user 29 -nixbld30 : Nix build user 30 -nixbld31 : Nix build user 31 -nixbld32 : Nix build user 32 - -Devices -******* - - -Processor ---------- - --Processors- -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2599.98MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2602.97MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2456.45MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2601.63MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.07MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.02MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.03MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.19MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2695.21MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2715.75MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2715.46MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1424.73MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1787.70MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2711.77MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1353.80MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2710.79MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2709.20MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2642.89MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2449.84MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.05MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2599.95MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2600.04MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2706.53MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2675.88MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2792.21MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1358.61MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1748.81MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2611.28MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 1373.20MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2705.52MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2736.24MHz -Intel(R) Xeon(R) CPU E5-2640 v4 @ 2.40GHz : 2702.87MHz - -Memory ------- - --Memory- -Total Memory : 65830480 kB -Free Memory : 14031276 kB -MemAvailable : 38320820 kB -Buffers : 398312 kB -Cached : 23096348 kB -Cached Swap : 126052 kB -Active : 33001128 kB -Inactive : 14469604 kB -Active(anon) : 23204468 kB -Inactive(anon) : 830232 kB -Active(file) : 9796660 kB -Inactive(file) : 13639372 kB -Unevictable : 3684 kB -Mlocked : 3684 kB -Virtual Memory : 3997692 kB -Free Virtual Memory : 2660556 kB -Dirty : 136 kB -Writeback : 8 kB -AnonPages : 23971776 kB -Mapped : 1338980 kB -Shmem : 63004 kB -Slab : 3122640 kB -SReclaimable : 1516712 kB -SUnreclaim : 1605928 kB -KernelStack : 30624 kB -PageTables : 108936 kB -NFS_Unstable : 0 kB -Bounce : 0 kB -WritebackTmp : 0 kB -CommitLimit : 36912932 kB -Committed_AS : 64285764 kB -VmallocTotal : 34359738367 kB -VmallocUsed : 0 kB -VmallocChunk : 0 kB -HardwareCorrupted : 0 kB -AnonHugePages : 2934784 kB -ShmemHugePages : 0 kB -ShmemPmdMapped : 0 kB -CmaTotal : 0 kB -CmaFree : 0 kB -HugePages_Total : 0 -HugePages_Free : 0 -HugePages_Rsvd : 0 -HugePages_Surp : 0 -Hugepagesize : 2048 kB -DirectMap4k : 63504064 kB -DirectMap2M : 3469312 kB -DirectMap1G : 2097152 kB - -PCI Devices ------------ - --PCI Devices- -Host bridge : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D DMI2 (rev 01) -PCI bridge : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D PCI Express Root Port 1 (rev 01) (prog-if 00 [Normal decode]) -PCI bridge : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D PCI Express Root Port 3 (rev 01) (prog-if 00 [Normal decode]) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 1 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 2 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 3 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 4 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 5 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 6 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 7 (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Map/VTd_Misc/System Management (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D IIO Hot Plug (rev 01) -System peripheral : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D IIO RAS/Control Status/Global Errors (rev 01) -PIC : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D I/O APIC (rev 01) (prog-if 20 [IO(X)-APIC]) - -USB Devices ------------ - - -Printers --------- - --Printers- -No printers found - -Battery -------- - --No batteries- -No batteries found on this system - -Sensors -------- - --Cooling Fans- --Temperatures- --Voltage Values- - -Input Devices -------------- - --Input Devices- - Power Button - Power Button - HDA Intel PCH Front Mic - HDA Intel PCH Rear Mic - HDA Intel PCH Line - HDA Intel PCH Line Out Front - HDA Intel PCH Line Out Surround - HDA Intel PCH Line Out CLFE - HDA Intel PCH Front Headphone - HDA NVidia HDMI/DP,pcm : 3= - HDA NVidia HDMI/DP,pcm : 7= - HDA NVidia HDMI/DP,pcm : 8= - HDA NVidia HDMI/DP,pcm : 9= - HDA NVidia HDMI/DP,pcm : 3= - HDA NVidia HDMI/DP,pcm : 7= - HDA NVidia HDMI/DP,pcm : 8= - HDA NVidia HDMI/DP,pcm : 9= - Dell Dell USB Keyboard - Logitech USB-PS/2 Optical Mouse - -Storage -------- - --SCSI Disks- -ATA Samsung SSD 850 -ATA Samsung SSD 850 -Generic Ultra HS-SD/MMC - -DMI ---- - --BIOS- -Date : 11/09/2016 -Vendor : American Megatrends Inc. (www.ami.com) -Version : 2.0a --Board- -Name : X10DAI -Vendor : Supermicro - -Resources ---------- - --I/O Ports- -<tt>0000-0000 </tt> : PCI Bus 0000:80 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt>0000-0000 </tt> : PCI Bus 0000:80 -<tt>0000-0000 </tt> : PCI Bus 0000:80 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) -<tt>0000-0000 </tt> : PCI Bus 0000:80 -<tt> 0000-0000 </tt> : PCI Bus 0000:81 -<tt> 0000-0000 </tt> : NVIDIA Corporation GP102 [GeForce GTX 1080 Ti] (rev a1) (prog-if 00 [VGA controller]) --Memory- -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ICH HD audio -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt>00000000-00000000 </tt> : PCI Bus 0000:80 -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma -<tt> 00000000-00000000 </tt> : Intel Corporation Xeon E7 v4/Xeon E5 v4/Xeon E3 v4/Xeon D Crystal Beach DMA Channel 0 (rev 01) -<tt> 00000000-00000000 </tt> : ioatdma --DMA- -<tt> 4</tt> : cascade - -Network -******* - - -Interfaces ----------- - --Network Interfaces- -eth0 4338573.20MiB 13789563.94MiB 130.126.136.179 -eth1 0.00MiB 0.00MiB -lo 4132.02MiB 4132.02MiB 127.0.0.1 - -IP Connections --------------- - --Connections- -0.0.0.0:22 LISTEN 0.0.0.0:* tcp -127.0.0.1:3030 0.0.0.0:* udp -127.0.0.1:631 LISTEN 0.0.0.0:* tcp -127.0.0.1:3031 LISTEN 0.0.0.0:* tcp -127.0.0.1:8888 LISTEN 0.0.0.0:* tcp -127.0.0.1:25 LISTEN 0.0.0.0:* tcp -127.0.0.1:6010 ESTABLISHED 127.0.0.1:59498 tcp -0.0.0.0:445 LISTEN 0.0.0.0:* tcp -127.0.0.1:34409 LISTEN 0.0.0.0:* tcp -0.0.0.0:139 LISTEN 0.0.0.0:* tcp -0.0.0.0:47279 LISTEN 0.0.0.0:* tcp -0.0.0.0:111 0.0.0.0:* udp -0.0.0.0:39059 LISTEN 0.0.0.0:* tcp -127.0.1.1:53 0.0.0.0:* udp -127.0.0.1:6010 ESTABLISHED 127.0.0.1:59498 tcp -127.0.0.1:59498 ESTABLISHED 127.0.0.1:6010 tcp -130.126.136.179:765 ESTABLISHED 172.22.98.29:33920 tcp -130.126.136.179:40932 ESTABLISHED 172.22.246.11:4505 tcp -130.126.136.179:789 ESTABLISHED 130.126.112.49:2049 tcp -130.126.136.179:58444 ESTABLISHED 172.22.6.74:5672 tcp -130.126.136.179:37674 ESTABLISHED 172.22.6.146:8086 tcp -130.126.136.179:58446 ESTABLISHED 172.22.6.74:5672 tcp -130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp -130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp -130.126.136.179:45112 TIME_WAIT 172.22.98.29:111 tcp -130.126.136.179:58442 ESTABLISHED 172.22.6.74:5672 tcp -130.126.136.179:33326 ESTABLISHED 172.22.246.13:4505 tcp -130.126.136.179:59164 ESTABLISHED 172.22.246.140:4505 tcp -130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp -130.126.136.179:58440 ESTABLISHED 172.22.6.74:5672 tcp -130.126.136.179:745 ESTABLISHED 172.22.98.29:2049 tcp -130.126.136.179:51970 ESTABLISHED 172.22.246.14:10514 tcp -130.126.136.179:22 ESTABLISHED 10.251.18.206:65464 tcp -:::22 LISTEN :::* tcp6 -::1:631 LISTEN :::* tcp6 -::1:6010 LISTEN :::* tcp6 -:::445 LISTEN :::* tcp6 -:::44513 LISTEN :::* tcp6 -:::139 LISTEN :::* tcp6 -:::111 :::* udp6 -:::35349 LISTEN :::* tcp6 -0.0.0.0:59373 0.0.0.0:* udp -127.0.1.1:53 0.0.0.0:* udp -0.0.0.0:68 0.0.0.0:* udp -0.0.0.0:111 0.0.0.0:* udp -130.126.136.179:123 0.0.0.0:* udp -127.0.0.1:123 0.0.0.0:* udp -0.0.0.0:123 0.0.0.0:* udp -130.126.139.255:137 0.0.0.0:* udp -130.126.136.179:137 0.0.0.0:* udp -0.0.0.0:137 0.0.0.0:* udp -130.126.139.255:138 0.0.0.0:* udp -130.126.136.179:138 0.0.0.0:* udp -0.0.0.0:138 0.0.0.0:* udp -0.0.0.0:631 0.0.0.0:* udp -127.0.0.1:762 0.0.0.0:* udp -0.0.0.0:773 0.0.0.0:* udp -0.0.0.0:34156 0.0.0.0:* udp -0.0.0.0:34688 0.0.0.0:* udp -127.0.0.1:3030 0.0.0.0:* udp -0.0.0.0:37696 0.0.0.0:* udp -0.0.0.0:5353 0.0.0.0:* udp -0.0.0.0:42410 0.0.0.0:* udp -:::111 :::* udp6 -2620:0:e00:550a:5c4:123 :::* udp6 -2620:0:e00:550a:4cd:123 :::* udp6 -2620:0:e00:550a:f5e:123 :::* udp6 -2620:0:e00:550a:c5b:123 :::* udp6 -2620:0:e00:550a:cd1:123 :::* udp6 -2620:0:e00:550a:dcf:123 :::* udp6 -2620:0:e00:550a:d8d:123 :::* udp6 -fe80::ec4:7aff:fedc:123 :::* udp6 -2620:0:e00:550a:ec4:123 :::* udp6 -::1:123 :::* udp6 -:::123 :::* udp6 -:::773 :::* udp6 -:::37708 :::* udp6 -:::5353 :::* udp6 -:::39262 :::* udp6 -:::47288 :::* udp6 -:::55076 :::* udp6 - -Routing Table -------------- - --IP routing table- -0.0.0.0 / 130.126.136.1 0.0.0.0 UG eth0 -130.126.136.0 / 0.0.0.0 255.255.252.0 U eth0 -169.254.0.0 / 0.0.0.0 255.255.0.0 U eth0 -192.17.2.10 / 130.126.136.1 255.255.255.255 UGH eth0 - -ARP Table ---------- - --ARP Table- -130.126.136.121 90:b1:1c:2d:31:0e eth0 -130.126.136.198 40:a8:f0:47:38:97 eth0 -130.126.136.1 00:fe:c8:5d:df:ff eth0 -130.126.136.110 98:10:e8:f3:a2:4f eth0 -130.126.136.247 00:11:32:c3:57:3f eth0 -130.126.136.159 c4:34:6b:5e:26:eb eth0 -130.126.136.243 00:4e:01:bf:b6:ea eth0 -130.126.136.77 bc:30:5b:e0:f9:06 eth0 -130.126.139.44 98:90:96:dc:55:d9 eth0 -130.126.136.14 4c:d9:8f:17:1c:e2 eth0 -130.126.136.73 18:66:da:31:06:6b eth0 -130.126.136.63 18:66:da:3d:e2:68 eth0 -169.254.169.254 00:fe:c8:5d:df:ff eth0 -130.126.136.126 20:04:0f:f4:66:74 eth0 -130.126.139.118 70:85:c2:83:33:0b eth0 -130.126.136.210 2c:4d:54:46:4f:ed eth0 -130.126.136.195 48:4d:7e:f5:03:b2 eth0 -130.126.136.55 40:a8:f0:5e:b9:9f eth0 -130.126.136.118 90:b1:1c:1a:02:d6 eth0 -130.126.136.156 04:d4:c4:5b:50:f4 eth0 -130.126.136.240 e4:54:e8:79:44:68 eth0 -130.126.136.162 88:51:fb:58:dc:b2 eth0 -130.126.136.148 6c:2b:59:d5:f9:d2 eth0 -130.126.136.11 8c:dc:d4:29:8d:28 eth0 -130.126.136.74 c8:2a:14:1e:91:67 eth0 -130.126.138.79 74:e6:e2:da:8b:93 eth0 -130.126.136.7 18:03:73:3c:2e:f1 eth0 -130.126.136.129 00:25:00:ed:73:7d eth0 -130.126.136.245 00:04:4b:dd:43:42 eth0 -130.126.136.119 90:b1:1c:2d:2a:90 eth0 -130.126.136.182 90:b1:1c:5e:2c:72 eth0 -130.126.136.241 e4:54:e8:79:52:be eth0 -130.126.136.100 64:00:6a:5f:2e:25 eth0 -130.126.139.197 6c:3b:e5:14:6c:8f eth0 -130.126.136.12 54:bf:64:99:59:02 eth0 -130.126.136.23 8c:ec:4b:b2:4f:c9 eth0 -130.126.136.201 fc:aa:14:2f:a7:ab eth0 -130.126.136.191 40:6c:8f:24:b1:a4 eth0 -130.126.138.206 18:66:da:0d:6f:d0 eth0 -130.126.136.120 90:b1:1c:2d:2e:f8 eth0 -130.126.136.46 48:4d:7e:e6:a4:16 eth0 -130.126.138.135 38:c9:86:02:0b:99 eth0 -130.126.136.105 d0:94:66:7b:67:c0 eth0 -130.126.136.221 f8:b1:56:c6:32:86 eth0 -130.126.136.95 00:24:e8:44:aa:a7 eth0 -130.126.136.28 ac:1f:6b:48:8a:84 eth0 -130.126.136.242 00:4e:01:bf:6c:ca eth0 -130.126.136.76 18:66:da:00:bb:74 eth0 -130.126.136.97 98:90:96:bc:6f:66 eth0 -130.126.136.160 ac:87:a3:1f:4e:ca eth0 -130.126.136.9 6c:2b:59:c7:ad:19 eth0 -130.126.136.72 50:9a:4c:59:c3:5a eth0 -130.126.136.209 18:66:da:30:64:45 eth0 -130.126.136.83 00:13:3b:11:6a:a1 eth0 -130.126.136.5 4c:d9:8f:0c:1e:02 eth0 - -DNS Servers ------------ - --Name servers- -127.0.1.1 - -Statistics ----------- - --IP- -3340235186 : Total packets received -99269 : With invalid addresses -0 : Incoming packets discarded -0 : Incoming packets discarded -3340132854 : Incoming packets delivered -1109643438 : Requests sent out -16 : Outgoing packets dropped -248929 : Dropped because of missing route -6126 : Reassemblies required -3063 : Packets reassembled ok --ICMP- -1355 : ICMP messages received -19 : Input ICMP message failed. -7599 : ICMP messages sent -0 : ICMP messages failed --ICMPMSG- --TCP- -26139644 : Active connections openings -3919 : Passive connection openings -25871410 : Failed connection attempts -4562 : Connection resets received -18 : Connections established -3302346907 : Segments received -10359040645 : Segments send out -2844431 : Segments retransmited -1 : Bad segments received. -25878380 : Resets sent --UDP- -24979118 : Packets received -6362 : Packets to unknown port received. -4203 : Packet receive errors -662177 : Packets sent --UDPLITE- --TCPEXT- -1 : Resets received for embryonic SYN_RECV sockets -235858 : TCP sockets finished time wait in fast timer -171 : Time wait sockets recycled by time stamp -1825 : Packets rejects in established connections because of timestamp -6984717 : Delayed acks sent -3194 : Delayed acks further delayed because of locked socket -527487290 : Packet headers predicted -543016884 : Acknowledgments not containing data payload received -1893599141 : Predicted acknowledgments -554129 : Times recovered from packet loss by selective acknowledgements -119 : Congestion windows fully recovered without slow start -85 : Congestion windows partially recovered using Hoe heuristic -6456 : Congestion windows recovered without slow start by DSACK -1683 : Congestion windows recovered without slow start after partial ack -101 : Timeouts after SACK recovery -136 : Timeouts in loss state -2738885 : Fast retransmits -2730 : Retransmits in slow start -3141 : Other TCP timeouts -820 : SACK retransmits failed -32589 : DSACKs sent for old packets -607 : DSACKs sent for out of order packets -84876 : DSACKs received -567 : DSACKs for out of order packets received -650 : Connections reset due to unexpected data -4384 : Connections reset due to early user close -286 : Connections aborted due to timeout -3 : Times unabled to send RST due to no memory --IPEXT- - -Shared Directories ------------------- - --SAMBA- --NFS- - -Benchmarks -********** - - -CPU Blowfish ------------- - --CPU Blowfish- -<big><b>This Machine</b></big> 2600 MHz 0.665 -Intel(R) Celeron(R) M processor 1.50GHz (null) 26.1876862 -PowerPC 740/750 (280.00MHz) (null) 172.816713 - -CPU CryptoHash --------------- - --CPU CryptoHash- -<big><b>This Machine</b></big> 2600 MHz 1697.719 - -CPU Fibonacci -------------- - --CPU Fibonacci- -<big><b>This Machine</b></big> 2600 MHz 2.166 -Intel(R) Celeron(R) M processor 1.50GHz (null) 8.1375674 -PowerPC 740/750 (280.00MHz) (null) 58.07682 - -CPU N-Queens ------------- - --CPU N-Queens- -<big><b>This Machine</b></big> 2600 MHz 0.592 - -FPU FFT -------- - --FPU FFT- -<big><b>This Machine</b></big> 2600 MHz 1.072 - -FPU Raytracing --------------- - --FPU Raytracing- -<big><b>This Machine</b></big> 2600 MHz 6.178 -Intel(R) Celeron(R) M processor 1.50GHz (null) 40.8816714 -PowerPC 740/750 (280.00MHz) (null) 161.312647 diff --git a/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb b/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb deleted file mode 100644 index bbac5d491d80f5f4aaa9286b2323704e55555b48..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/.ipynb_checkpoints/mnist-checkpoint.ipynb +++ /dev/null @@ -1,105 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import numpy as np\n", - "import onnx\n", - "import glob\n", - "from onnxruntime.backend.backend import OnnxRuntimeBackend as backend\n", - "\n", - "from onnx import numpy_helper\n", - "\n", - "# onnx2hpvm modules\n", - "from onnx2hpvm.onnx_translator import from_onnx_to_hpvm" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "model = onnx.load('../models/mnist/mnist.onnx')\n", - "test_data_dir = '../models/mnist/test_data_set_0'" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n" - ] - } - ], - "source": [ - "# Load inputs\n", - "inputs = []\n", - "inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))\n", - "print(inputs_num)\n", - "for i in range(inputs_num):\n", - " input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))\n", - " tensor = onnx.TensorProto()\n", - " with open(input_file, 'rb') as f:\n", - " tensor.ParseFromString(f.read())\n", - " inputs.append(numpy_helper.to_array(tensor))\n", - "\n", - "# Load reference outputs\n", - "ref_outputs = []\n", - "ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb')))\n", - "for i in range(ref_outputs_num):\n", - " output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i))\n", - " tensor = onnx.TensorProto()\n", - " with open(output_file, 'rb') as f:\n", - " tensor.ParseFromString(f.read())\n", - " ref_outputs.append(numpy_helper.to_array(tensor))\n", - "\n", - "# Run the model on the backend\n", - "outputs = list(backend.run_model(model, inputs))\n", - "\n", - "#from_onnx_to_hpvm(model)\n", - "# Compare the results with reference outputs.\n", - "#for ref_o, o in zip(ref_outputs, outputs):\n", - "# np.testing.assert_almost_equal(ref_o, o)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/alexnet_keras.ipynb b/hpvm/projects/onnx/src/alexnet_keras.ipynb deleted file mode 100644 index 3e46e220eb5438188d19e06934c6a9ef780cfa21..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/alexnet_keras.ipynb +++ /dev/null @@ -1,356 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import cifar10\n", - "from keras.models import Sequential\n", - "from keras.layers.core import Dense, Dropout, Flatten, Activation\n", - "from keras.layers.convolutional import Conv2D\n", - "from keras.optimizers import Adam\n", - "from keras.layers.pooling import MaxPooling2D\n", - "from keras.utils.np_utils import to_categorical\n", - "from keras.preprocessing.image import ImageDataGenerator\n", - "from keras import backend as K\n", - "from keras import regularizers\n", - "from keras.callbacks import LearningRateScheduler\n", - "import sys\n", - "import struct\n", - "import keras\n", - "import numpy as np\n", - "import os\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "def buildModel2():\n", - "\n", - " activation_type = \"tanh\"\n", - " weight_decay = 1e-4\n", - " \n", - " model = Sequential()\n", - " model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type,\n", - " input_shape=(3, 32, 32), padding = 'same',\n", - " kernel_regularizer=regularizers.l2(weight_decay) ))\n", - " model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n", - " model.add(Dropout(0.2))\n", - " model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same',\n", - " kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n", - " model.add(Dropout(0.3))\n", - "\n", - " model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n", - " kernel_regularizer=regularizers.l2(weight_decay) )) \n", - " model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n", - " kernel_regularizer=regularizers.l2(weight_decay) ))\n", - " model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same',\n", - " kernel_regularizer=regularizers.l2(weight_decay) ))\n", - " model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) ))\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Flatten())\n", - " #model.add(Flatten())\n", - " #model.add(Dense(256))\n", - " model.add(Dense(10))\n", - " model.add(Activation('softmax'))\n", - " \n", - " return model" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def lr_schedule(epoch):\n", - " lrate = 0.001\n", - " if epoch > 20:\n", - " lrate = 0.0005\n", - " if epoch > 40:\n", - " lrate = 0.0003\n", - " if epoch > 60:\n", - " lrate = 0.0001\n", - " if epoch > 80:\n", - " lrate = 0.00005 \n", - " \n", - " return lrate" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "def trainModel(model): \n", - " #dir_prefix = \"/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/\"\n", - "\n", - " #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6)\n", - " # Compile the model\n", - " model.compile(loss='categorical_crossentropy',\n", - " optimizer=Adam(lr=0.0001, decay=1e-6),\n", - " #optimizer = opt_rms,\n", - " metrics=['accuracy'])\n", - "\n", - " #print to_categorical(Y_train, 10)\n", - " print (to_categorical(Y_train))\n", - "\n", - "\n", - " datagen = ImageDataGenerator(\n", - " rotation_range=15,\n", - " width_shift_range=0.1,\n", - " height_shift_range=0.1,\n", - " horizontal_flip=True,\n", - " )\n", - " datagen.fit(X_train)\n", - "\n", - " \n", - " model.fit(X_train, to_categorical(Y_train, 10),\n", - " batch_size=128,\n", - " shuffle=True,\n", - " epochs = 1,\n", - " #epochs=100,\n", - " validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)])\n", - "\n", - " # Evaluate the model\n", - " scores = model.evaluate(X_test.astype('float32'), to_categorical(Y_test, 10))\n", - "\n", - " print('Loss: %.3f' % scores[0])\n", - " print('Accuracy: %.3f' % scores[1])\n", - " \n", - " print (\"*** TRAINED MODEL ****\\n\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "K.set_image_data_format('channels_first')\n", - "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n", - "test_labels = Y_test\n", - "train_labels = Y_train\n", - "\n", - "#X_train = X_train.astype('float32')\n", - "#X_test = X_test.astype('float32')\n", - "X_train = X_train / 255.0\n", - "X_test = X_test / 255.0\n", - "\n", - "mean = np.mean(X_train,axis=(0,1,2,3))\n", - "std = np.std(X_train,axis=(0,1,2,3)) \n", - "X_train = (X_train-mean)/(std+1e-7)\n", - "X_test = (X_test-mean)/(std+1e-7)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "X_train = X_train[:8000]\n", - "X_test = X_test[:8000]\n", - "Y_train = Y_train[:8000]\n", - "Y_test = Y_test[:8000]" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "[[0. 0. 0. ... 0. 0. 0.]\n", - " [0. 0. 0. ... 0. 0. 1.]\n", - " [0. 0. 0. ... 0. 0. 1.]\n", - " ...\n", - " [0. 0. 0. ... 0. 0. 1.]\n", - " [0. 1. 0. ... 0. 0. 0.]\n", - " [0. 1. 0. ... 0. 0. 0.]]\n", - "WARNING:tensorflow:Variable *= will be deprecated. Use `var.assign(var * other)` if you want assignment to the variable value or `x = x * y` if you want a new python Tensor object.\n", - "Train on 50000 samples, validate on 10000 samples\n", - "Epoch 1/1\n", - "50000/50000 [==============================] - 12s 232us/step - loss: 2.0118 - acc: 0.3704 - val_loss: 1.8003 - val_acc: 0.4312\n", - "10000/10000 [==============================] - 1s 150us/step\n", - "Loss: 1.800\n", - "Accuracy: 0.431\n", - "*** TRAINED MODEL ****\n", - "\n" - ] - } - ], - "source": [ - "model = buildModel2()\n", - "trainModel(model)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 1.0208089351654053\n" - ] - } - ], - "source": [ - "import time\n", - "start = time.time()\n", - "keras_result = model.predict(X_test[:8000])\n", - "print(\"time:\", time.time() - start)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import keras2onnx\n", - "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", - "import onnx\n", - "onnx.save(onnx_model, \"../models/keras/alexnet8k.onnx\")\n", - "import pickle\n", - "with open('keras_dump', 'wb') as fp:\n", - " pickle.dump(keras_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#onnx_model = keras2onnx.convert_keras(model, model.name)\n", - "import onnxruntime\n", - "sess = onnxruntime.InferenceSession(\"../models/keras/alexnet.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "input_name = sess.get_inputs()[0].name\n", - "print(\"Input name :\", input_name)\n", - "input_shape = sess.get_inputs()[0].shape\n", - "print(\"Input shape :\", input_shape)\n", - "input_type = sess.get_inputs()[0].type\n", - "print(\"Input type :\", input_type)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "output_name = sess.get_outputs()[0].name\n", - "print(\"Output name :\", output_name) \n", - "output_shape = sess.get_outputs()[0].shape\n", - "print(\"Output shape :\", output_shape)\n", - "output_type = sess.get_outputs()[0].type\n", - "print(\"Output type :\", output_type)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "\n", - "#K.clear_session()\n", - "start = time.time()\n", - "ort_result = sess.run([output_name], {input_name: X_test.astype('float32')})\n", - "import pickle\n", - "\n", - "with open('dumps/ort_dump', 'wb') as fp:\n", - " pickle.dump(ort_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('ort_dump', 'rb') as fp:\n", - " ort_res = pickle.load(fp)\n", - "with open ('keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(ort_res[0], keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 5) #using decimal of 3 would pass test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "print(ort_res[0])\n", - "print(\"--------------\")\n", - "print(keras_res)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/alexnet_onnx.ipynb b/hpvm/projects/onnx/src/alexnet_onnx.ipynb deleted file mode 100644 index 73755ecf1ccc27eedc5f1319c20e30473936c3e8..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/alexnet_onnx.ipynb +++ /dev/null @@ -1,192 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import cifar10\n", - "from keras import backend as K\n", - "import numpy as np\n", - "import os\n", - "import keras2onnx\n", - "import onnx\n", - "import onnxruntime\n", - "import time\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "K.set_image_data_format('channels_first')\n", - "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n", - "test_labels = Y_test\n", - "train_labels = Y_train\n", - "\n", - "#X_train = X_train.astype('float32')\n", - "#X_test = X_test.astype('float32')\n", - "X_train = X_train / 255.0\n", - "X_test = X_test / 255.0\n", - "\n", - "mean = np.mean(X_train,axis=(0,1,2,3))\n", - "std = np.std(X_train,axis=(0,1,2,3)) \n", - "X_train = (X_train-mean)/(std+1e-7)\n", - "X_test = (X_test-mean)/(std+1e-7)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "sess = onnxruntime.InferenceSession(\"../models/keras/alexnet.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input name : conv2d_1_input\n", - "Input shape : [None, 3, 32, 32]\n", - "Input type : tensor(float)\n" - ] - } - ], - "source": [ - "input_name = sess.get_inputs()[0].name\n", - "print(\"Input name :\", input_name)\n", - "input_shape = sess.get_inputs()[0].shape\n", - "print(\"Input shape :\", input_shape)\n", - "input_type = sess.get_inputs()[0].type\n", - "print(\"Input type :\", input_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Output name : activation_1\n", - "Output shape : [None, 10]\n", - "Output type : tensor(float)\n" - ] - } - ], - "source": [ - "output_name = sess.get_outputs()[0].name\n", - "print(\"Output name :\", output_name) \n", - "output_shape = sess.get_outputs()[0].shape\n", - "print(\"Output shape :\", output_shape)\n", - "output_type = sess.get_outputs()[0].type\n", - "print(\"Output type :\", output_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 1.3165459632873535\n" - ] - } - ], - "source": [ - "#X_test = np.moveaxis(X_test, -1, 1)\n", - "X_test.shape\n", - "#K.clear_session()\n", - "start_time = time.time()\n", - "ort_result = sess.run([output_name], {input_name: X_test.astype('float32')[:8000]})\n", - "print(\"time: \", time.time() - start_time)\n", - "import pickle\n", - "\n", - "with open('dumps/ort_dump', 'wb') as fp:\n", - " pickle.dump(ort_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 20, - "metadata": {}, - "outputs": [ - { - "ename": "FileNotFoundError", - "evalue": "[Errno 2] No such file or directory: 'dumps/keras_dump'", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mFileNotFoundError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-20-a59ffee1b5dd>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'dumps/ort_dump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0mort_res\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0;32mwith\u001b[0m \u001b[0mopen\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0;34m'dumps/keras_dump'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'rb'\u001b[0m\u001b[0;34m)\u001b[0m \u001b[0;32mas\u001b[0m \u001b[0mfp\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0mkeras_res\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfp\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mFileNotFoundError\u001b[0m: [Errno 2] No such file or directory: 'dumps/keras_dump'" - ] - } - ], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/ort_dump', 'rb') as fp:\n", - " ort_res = pickle.load(fp)\n", - "with open ('dumps/keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(ort_res[0], keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/alexnet_tvm.ipynb b/hpvm/projects/onnx/src/alexnet_tvm.ipynb deleted file mode 100644 index eaa92800e5ef55e1de64cbbde9395872727a4e5b..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/alexnet_tvm.ipynb +++ /dev/null @@ -1,225 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import cifar10\n", - "from keras import backend as K\n", - "import sys\n", - "import struct\n", - "import numpy as np\n", - "import os\n", - "import tvm\n", - "import tvm.relay as relay\n", - "from tvm.contrib import graph_runtime\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "K.set_image_data_format('channels_last')\n", - "(X_train, Y_train), (X_test, Y_test) = cifar10.load_data()\n", - "X_test = X_test / 255.0\n", - "mean = np.mean(X_train,axis=(0,1,2,3))\n", - "std = np.std(X_train,axis=(0,1,2,3)) \n", - "X_test = (X_test-mean)/(std+1e-7)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#print(X_test.shape)\n", - "#X_test = X_test[:8000]\n", - "#print(X_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import onnx\n", - "onnx_model = onnx.load(\"../models/keras/alexnet_last.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "target='cuda -libs=cudnn,cublas'\n", - "input_name = 'conv2d_8_input'\n", - "shape_dict = {input_name: X_test[:1000].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "ctx = tvm.gpu()\n", - "with relay.build_config(opt_level=3):\n", - " executor = relay.build_module.create_executor('graph', mod, ctx, target)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# LLVM EXECUTE SUCCEEDED\n", - "import time\n", - "start_time = time.time()\n", - "tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')[:1000]), **params)\n", - "print(\"Time:\", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n", - "import pickle\n", - "with open('dumps/tvm_dump', 'wb') as fp:\n", - " pickle.dump(tvm_output, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 4096, 'float32'), (10, 4096, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 256, 8, 8, 'float32'), (256, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 384, 8, 8, 'float32'), (256, 384, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 192, 8, 8, 'float32'), (384, 192, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 64, 16, 16, 'float32'), (192, 64, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 3, 32, 32, 'float32'), (64, 3, 11, 11, 'float32'), (1, 1), (5, 5), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" - ] - } - ], - "source": [ - "input_name = 'conv2d_8_input'\n", - "input_size = 8000\n", - "shape_dict = {input_name: X_test[:input_size].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "target = 'cuda -libs=cudnn,cublas'\n", - "with relay.build_config(opt_level=3):\n", - " graph, lib, params = relay.build(mod, target, params=params)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "ename": "TVMError", - "evalue": "Traceback (most recent call last):\n [bt] (7) /home/ys26/tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f664ddc32b1]\n [bt] (6) /home/ys26/tvm/build/libtvm.so(+0xbecd74) [0x7f664de16d74]\n [bt] (5) /home/ys26/tvm/build/libtvm.so(+0xbecc19) [0x7f664de16c19]\n [bt] (4) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntimeCreate(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module const&, std::vector<DLContext, std::allocator<DLContext> > const&)+0xcf) [0x7f664de16a1f]\n [bt] (3) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Init(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module, std::vector<DLContext, std::allocator<DLContext> > const&)+0x258) [0x7f664de16698]\n [bt] (2) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::SetupStorage()+0x65f) [0x7f664de1334f]\n [bt] (1) /home/ys26/tvm/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x1ef) [0x7f664dd9fcaf]\n [bt] (0) /home/ys26/tvm/build/libtvm.so(tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xde1) [0x7f664de226d1]\n File \"/home/ys26/tvm/src/runtime/cuda/cuda_device_api.cc\", line 119\nCUDA: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: out of memory", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mTVMError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m<ipython-input-6-0bd1741a62f4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m\u001b[0m\n\u001b[1;32m 3\u001b[0m \u001b[0;31m#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 4\u001b[0m \u001b[0;31m# create module\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 5\u001b[0;31m \u001b[0mmodule\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mgraph_runtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlib\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 6\u001b[0m \u001b[0;31m# set input and parameters\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 7\u001b[0m \u001b[0mmodule\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mset_input\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"conv2d_8_input\"\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mX_test\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0minput_size\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mastype\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"float32\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/tvm/python/tvm/contrib/graph_runtime.py\u001b[0m in \u001b[0;36mcreate\u001b[0;34m(graph_json_str, libmod, ctx)\u001b[0m\n\u001b[1;32m 57\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 58\u001b[0m \u001b[0mfcreate\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mget_global_func\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"tvm.graph_runtime.create\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m---> 59\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mGraphModule\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfcreate\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mgraph_json_str\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlibmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m*\u001b[0m\u001b[0mdevice_type_id\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 60\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 61\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0mget_device_ctx\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mlibmod\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctx\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;32m~/tvm/python/tvm/_ffi/_ctypes/function.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mhandle\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mvalues\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtcodes\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mctypes\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mc_int\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mnum_args\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 206\u001b[0m ctypes.byref(ret_val), ctypes.byref(ret_tcode)) != 0:\n\u001b[0;32m--> 207\u001b[0;31m \u001b[0;32mraise\u001b[0m \u001b[0mget_last_ffi_error\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 208\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtemp_args\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 209\u001b[0m \u001b[0m_\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0margs\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", - "\u001b[0;31mTVMError\u001b[0m: Traceback (most recent call last):\n [bt] (7) /home/ys26/tvm/build/libtvm.so(TVMFuncCall+0x61) [0x7f664ddc32b1]\n [bt] (6) /home/ys26/tvm/build/libtvm.so(+0xbecd74) [0x7f664de16d74]\n [bt] (5) /home/ys26/tvm/build/libtvm.so(+0xbecc19) [0x7f664de16c19]\n [bt] (4) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntimeCreate(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module const&, std::vector<DLContext, std::allocator<DLContext> > const&)+0xcf) [0x7f664de16a1f]\n [bt] (3) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::Init(std::__cxx11::basic_string<char, std::char_traits<char>, std::allocator<char> > const&, tvm::runtime::Module, std::vector<DLContext, std::allocator<DLContext> > const&)+0x258) [0x7f664de16698]\n [bt] (2) /home/ys26/tvm/build/libtvm.so(tvm::runtime::GraphRuntime::SetupStorage()+0x65f) [0x7f664de1334f]\n [bt] (1) /home/ys26/tvm/build/libtvm.so(tvm::runtime::NDArray::Empty(std::vector<long, std::allocator<long> >, DLDataType, DLContext)+0x1ef) [0x7f664dd9fcaf]\n [bt] (0) /home/ys26/tvm/build/libtvm.so(tvm::runtime::CUDADeviceAPI::AllocDataSpace(DLContext, unsigned long, unsigned long, DLDataType)+0xde1) [0x7f664de226d1]\n File \"/home/ys26/tvm/src/runtime/cuda/cuda_device_api.cc\", line 119\nCUDA: Check failed: e == cudaSuccess || e == cudaErrorCudartUnloading: out of memory" - ] - } - ], - "source": [ - "import time\n", - "ctx = tvm.gpu()\n", - "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", - "# create module\n", - "module = graph_runtime.create(graph, lib, ctx)\n", - "# set input and parameters\n", - "module.set_input(\"conv2d_8_input\", X_test[:input_size].astype(\"float32\"))\n", - "module.set_input(**params)\n", - "# run\n", - "start_time = time.time()\n", - "module.run()\n", - "out_shape = (input_size, 10)\n", - "# get output\n", - "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", - "print(\"Time:\", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/tvm_dump', 'rb') as fp:\n", - " tvm_res = pickle.load(fp)\n", - "with open ('dumps/keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(tvm_res, keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test\n", - "print(\"Accuracy matched!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/alexnet_tvm.py b/hpvm/projects/onnx/src/alexnet_tvm.py deleted file mode 100644 index dbbe8c6f8cb5f4e0cb98a467784ded7cfb384d31..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/alexnet_tvm.py +++ /dev/null @@ -1,40 +0,0 @@ -import numpy as np -from keras.datasets import cifar10 -from keras import backend as K -import sys -import struct -import numpy as np -import os -import tvm -import tvm.relay as relay -from tvm.contrib import graph_runtime -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -K.set_image_data_format('channels_last') -(X_train, Y_train), (X_test, Y_test) = cifar10.load_data() -test_labels = Y_test -train_labels = Y_train - -#X_train = X_train.astype('float32') -#X_test = X_test.astype('float32') -X_train = X_train / 255.0 -X_test = X_test / 255.0 - -mean = np.mean(X_train,axis=(0,1,2,3)) -std = np.std(X_train,axis=(0,1,2,3)) -X_train = (X_train-mean)/(std+1e-7) -X_test = (X_test-mean)/(std+1e-7) - -import onnx -onnx_model = onnx.load("../models/keras/alexnet_last.onnx") - -target = tvm.target.cuda() -input_name = 'conv2d_8_input' -shape_dict = {input_name: X_test.shape} -mod, params = relay.frontend.from_onnx(onnx_model, shape_dict) -ctx = tvm.gpu(0) -with relay.build_config(opt_level=3): - executor = relay.build_module.create_executor('graph', mod, ctx, target) - -dtype = 'float32' -tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')), **params) \ No newline at end of file diff --git a/hpvm/projects/onnx/src/keras_models/alexnet.py b/hpvm/projects/onnx/src/keras_models/alexnet.py deleted file mode 100644 index b562c7d658969c8efe8dca20a0502475268a7af2..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/alexnet.py +++ /dev/null @@ -1,174 +0,0 @@ - -import numpy as np -from keras.datasets import cifar10 -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Flatten, Activation -from keras.layers.convolutional import Conv2D -from keras.optimizers import Adam -from keras.layers.pooling import MaxPooling2D -from keras.utils.np_utils import to_categorical -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K -from keras import regularizers -from keras.callbacks import LearningRateScheduler -import sys -import struct -import keras -import numpy as np -import os - - - -def lr_schedule(epoch): - lrate = 0.001 - if epoch > 20: - lrate = 0.0005 - if epoch > 40: - lrate = 0.0003 - if epoch > 60: - lrate = 0.0001 - if epoch > 80: - lrate = 0.00005 - - return lrate - - - -def buildModel2(): - - activation_type = "tanh" - weight_decay = 1e-4 - - model = Sequential() - model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type, - input_shape=(3, 32, 32), padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.2)) - model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay))) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.3)) - - model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.4)) - - model.add(Flatten()) - #model.add(Flatten()) - #model.add(Dense(256)) - model.add(Dense(10)) - model.add(Activation('softmax')) - - return model - - - -def buildModel(): - - model = Sequential() - model.add(Conv2D(128, kernel_size=(3, 3), activation='tanh', input_shape=(3, 32, 32), padding = 'same')) - model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) - model.add(MaxPooling2D(pool_size=(2, 2))) - #model.add(Dropout(0.25)) - - model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) - model.add(MaxPooling2D(pool_size=(2, 2))) - #model.add(Dropout(0.25)) - - model.add(Flatten()) - #model.add(Flatten()) - model.add(Dense(4096, activation='tanh')) - #model.add(Dropout(0.5)) - model.add(Dense(2048, activation='tanh')) - model.add(Dense(10, activation='tanh')) - model.add(Activation('softmax')) - - return model - - - - -def trainModel(model): - - (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() - test_labels = Y_test - train_labels = Y_train - - #X_train = X_train.astype('float32') - #X_test = X_test.astype('float32') - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train,axis=(0,1,2,3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) - - dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" - - #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) - # Compile the model - model.compile(loss='categorical_crossentropy', - optimizer=Adam(lr=0.0001, decay=1e-6), - #optimizer = opt_rms, - metrics=['accuracy']) - - #print to_categorical(Y_train, 10) - print (to_categorical(Y_train)) - - - datagen = ImageDataGenerator( - rotation_range=15, - width_shift_range=0.1, - height_shift_range=0.1, - horizontal_flip=True, - ) - datagen.fit(X_train) - - - model.fit(X_train, to_categorical(Y_train, 10), - batch_size=128, - shuffle=True, - epochs = 1, - #epochs=100, - validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)]) - - # Evaluate the model - scores = model.evaluate(X_test, to_categorical(Y_test, 10)) - - print('Loss: %.3f' % scores[0]) - print('Accuracy: %.3f' % scores[1]) - - print ("*** TRAINED MODEL ****\n") - - - #dumpCalibrationData("calibration_data/alexnet_calib.bin", X_train, - # "calibration_data/alexnet_train_labels.bin", train_labels) - - #translate_to_approxhpvm(model, "data/alexnet_cifar10/", X_test, test_labels, 10) - - - -if __name__ == "__main__": - - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - # Changing to NCHW format - K.set_image_data_format('channels_first') - - model = buildModel2() - trainModel(model) - import keras2onnx - onnx_model = keras2onnx.convert_keras(model, model.name) - import onnx - onnx.save(onnx_model, "../models/keras/alexnet.onnx") diff --git a/hpvm/projects/onnx/src/keras_models/alexnet2.py b/hpvm/projects/onnx/src/keras_models/alexnet2.py deleted file mode 100644 index 812b212165666370092a3d55e8482643b550f830..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/alexnet2.py +++ /dev/null @@ -1,243 +0,0 @@ - -import keras -from keras.models import Sequential -from keras.utils import np_utils -from keras.preprocessing.image import ImageDataGenerator -from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization -from keras.layers import Conv2D, MaxPooling2D -from keras.datasets import cifar10 -from keras import regularizers -from keras.callbacks import LearningRateScheduler -import numpy as np -import os -import struct -from keras import backend as K -from approxhpvm_translator import translate_to_approxhpvm - - - -def dumpWeights(file_name, weights, N, H, W, C): - # NOTE: Writing the NHWC weights array as NCHW - f = open(file_name, "wb") - for i in range(N): - for j in range(C): - for k in range(H): - for l in range(W): - f.write(weights[i][k][l][j]) - - f.close() - - -def dumpConvWeights(file_name, weights, N, C, H, W): - - print (weights.shape) - - f = open(file_name, "wb") - for i in range(N): - for j in range(C): - for k in range(H): - for l in range(W): - f.write(weights[k][l][j][i]) - f.close() - - - -def dumpFcWeights(file_name, weights, H, W): - - print (weights.shape) - - f = open(file_name, "wb") - for i in range(H): - for j in range(W): - f.write(weights[i][j]) - f.close() - - -def dumpFcBias(file_name, bias, W): - - print (bias.shape) - - f = open(file_name, "wb") - for i in range(W): - f.write(bias[i]) - f.close() - - -def dumpLabels(file_name, Y_test): - - f = open(file_name, "wb") - - labels_map = {} - for label in Y_test: - label_val = np.int8(label[0]) - if label_val not in labels_map: - labels_map[label_val] = 0 - labels_map[label_val] += 1 - - f.write(label_val) - - f.close() - - - -def dumpData(X_test, file_name, N, C, H, W): - - print (X_test.shape) - - f = open(file_name, "wb") - for i in range(N): - for j in range(C): - for k in range(H): - for l in range(W): - val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l])) - f.write(np.float32(val[0])) - - f.close() - - - - - -def lr_schedule(epoch): - lrate = 0.001 - if epoch > 75: - lrate = 0.0005 - if epoch > 100: - lrate = 0.0003 - return lrate - - -def lr_schedule2(epoch): - lrate = 0.0005 - if epoch > 100: - lrate = 0.0003 - if epoch > 200: - lrate = 0.0002 - if epoch > 250: - lrate = 0.0001 - if epoch > 300: - lrate = 0.00003 - - return lrate - - -K.set_image_data_format('channels_first') - -(x_train, y_train), (x_test, y_test) = cifar10.load_data() -test_labels = y_test -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') - -#z-score -mean = np.mean(x_train,axis=(0,1,2,3)) -std = np.std(x_train,axis=(0,1,2,3)) -x_train = (x_train-mean)/(std+1e-7) -x_test = (x_test-mean)/(std+1e-7) - - -# Dumping test data and test labels -dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/" - -dumpLabels(dir_prefix + "test_labels.bin", y_test) -dumpData(x_test, dir_prefix + "norm_cifar_input.bin", 10000, 3, 32, 32) - - - -num_classes = 10 -y_train = np_utils.to_categorical(y_train,num_classes) -y_test = np_utils.to_categorical(y_test,num_classes) - -weight_decay = 1e-4 -activation_type = 'tanh' - - -os.environ["CUDA_VISIBLE_DEVICES"] = "0" - - -model = Sequential() -model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:])) -model.add(Activation(activation_type)) -#model.add(BatchNormalization()) -model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) -model.add(Activation(activation_type)) -#model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2,2))) -model.add(Dropout(0.2)) - -model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) -model.add(Activation(activation_type)) -#model.add(BatchNormalization()) -model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) -model.add(Activation(activation_type)) -#model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2,2))) -model.add(Dropout(0.3)) - -model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) -model.add(Activation(activation_type)) -#model.add(BatchNormalization()) -model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) -model.add(Activation(activation_type)) -#model.add(BatchNormalization()) -model.add(MaxPooling2D(pool_size=(2,2))) -model.add(Dropout(0.4)) - -model.add(Flatten()) -model.add(Dense(num_classes)) -model.add(Activation('softmax')) -model.summary() - -#data augmentation -datagen = ImageDataGenerator( - rotation_range=15, - width_shift_range=0.1, - height_shift_range=0.1, - horizontal_flip=True, - ) - -datagen.fit(x_train) - - -#training -batch_size = 64 - -opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) -model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy']) -model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),\ - steps_per_epoch=x_train.shape[0] // batch_size, #epochs=350,\ - epochs=1, - verbose=1,validation_data=(x_test,y_test),callbacks=[LearningRateScheduler(lr_schedule2)]) -#save to disk -model_json = model.to_json() -with open('model.json', 'w') as json_file: - json_file.write(model_json) - model.save_weights('model.h5') - -#testing -scores = model.evaluate(x_test, y_test, batch_size=128, verbose=1) -print('\nTest result: %.3f loss: %.3f' % (scores[1]*100,scores[0])) - - -translate_to_approxhpvm(model, "alexnet2_cifar10_test/", x_test, test_labels, "alexnet2_cifar10/", y_test) -sys.exit(0) - - - -params = model.get_weights() -dumpConvWeights(dir_prefix + "conv1.bin", params[0], 32, 3, 3, 3) -dumpFcBias(dir_prefix + "conv1_bias.bin", params[1], 32) -dumpConvWeights(dir_prefix + "conv2.bin", params[2], 32, 32, 3, 3) -dumpFcBias(dir_prefix + "conv2_bias.bin", params[3], 32) -dumpConvWeights(dir_prefix + "conv3.bin", params[4], 64, 32, 3, 3) -dumpFcBias(dir_prefix + "conv3_bias.bin", params[5], 64) -dumpConvWeights(dir_prefix + "conv4.bin", params[6], 64, 64, 3, 3) -dumpFcBias(dir_prefix + "conv4_bias.bin", params[7], 64) -dumpConvWeights(dir_prefix + "conv5.bin", params[8], 128, 64, 3, 3) -dumpFcBias(dir_prefix + "conv5_bias.bin", params[9], 128) -dumpConvWeights(dir_prefix + "conv6.bin", params[10], 128, 128, 3, 3) -dumpFcBias(dir_prefix + "conv6_bias.bin", params[11], 128) - -dumpFcWeights(dir_prefix + "fc1.bin", params[12], 2048, 10) -dumpFcBias(dir_prefix + "fc1_bias.bin", params[13], 10) - - diff --git a/hpvm/projects/onnx/src/keras_models/lenet.py b/hpvm/projects/onnx/src/keras_models/lenet.py deleted file mode 100644 index c9588eef6c393457617b7fdda03c7b8222af5357..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/lenet.py +++ /dev/null @@ -1,97 +0,0 @@ - -import sys -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten, Activation -from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D -from keras import backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm - - -batch_size = 128 -num_classes = 10 - - -# input image dimensions -img_rows, img_cols = 28, 28 - - -if __name__ == "__main__": - - # Changing Keras data format to NCHW - NHWC is default - # NOTE: ApproxHPVM requires NCHW format - K.set_image_data_format('channels_first') - - # Loads Mnist dataset - (x_train, y_train), (x_test, y_test) = mnist.load_data() - test_labels = y_test - - # Reshaping data to be NCHW format - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) - - - # Data Normalization - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - - - # convert class vectors to binary class matrices - required by Keras - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - - - - # Network Compostion: 3 Conv Layers, 2 Dense Layers - model = Sequential() - - # ConvLayer1 - model.add(Conv2D(32, kernel_size=(5, 5), - activation='relu', - padding = 'same', - input_shape=input_shape)) - model.add(MaxPooling2D(pool_size=(2, 2))) - - # ConvLayer2 - model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same')) - - # ConvLayer3 - # NOTE: ZeroPading needed for ConvLayer with strides > 1 - model.add(ZeroPadding2D(padding = (1,1))) - model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') ) - - model.add(Flatten()) - # DenseLayer1 - model.add(Dense(1024, activation='relu')) - # DenseLayer2 - model.add(Dense(num_classes, activation='relu')) - # Softmax Layer - model.add(Activation('softmax')) - - - # Configures model for training - model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adadelta(), - metrics=['accuracy']) - - # Training - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=5, - verbose=1, - validation_data=(x_test, y_test)) - - - # Inference - score = model.evaluate(x_test, y_test, verbose=0) - print('Test loss:', score[0]) - print('Test accuracy:', score[1]) - - - # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src - translate_to_approxhpvm(model, "data/lenet_hpvm_batch/", x_test, test_labels, 10) - diff --git a/hpvm/projects/onnx/src/keras_models/mnist.ipynb b/hpvm/projects/onnx/src/keras_models/mnist.ipynb deleted file mode 100644 index 1964ebe3991a1eb78760ea6a1739eacfec47e666..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/mnist.ipynb +++ /dev/null @@ -1,105 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import sys\n", - "import numpy as np\n", - "import onnx\n", - "import glob\n", - "from onnxruntime.backend.backend import OnnxRuntimeBackend as backend\n", - "\n", - "from onnx import numpy_helper\n", - "\n", - "# onnx2hpvm modules\n", - "from onnx2hpvm.onnx_translator import from_onnx_to_hpvm" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "model = onnx.load('../models/mnist/mnist.onnx')\n", - "test_data_dir = '../models/mnist/test_data_set_0'" - ] - }, - { - "cell_type": "code", - "execution_count": 8, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "1\n" - ] - } - ], - "source": [ - "# Load inputs\n", - "inputs = []\n", - "inputs_num = len(glob.glob(os.path.join(test_data_dir, 'input_*.pb')))\n", - "print(inputs_num)\n", - "for i in range(inputs_num):\n", - " input_file = os.path.join(test_data_dir, 'input_{}.pb'.format(i))\n", - " tensor = onnx.TensorProto()\n", - " with open(input_file, 'rb') as f:\n", - " tensor.ParseFromString(f.read())\n", - " inputs.append(numpy_helper.to_array(tensor))\n", - "\n", - "# Load reference outputs\n", - "ref_outputs = []\n", - "ref_outputs_num = len(glob.glob(os.path.join(test_data_dir, 'output_*.pb')))\n", - "for i in range(ref_outputs_num):\n", - " output_file = os.path.join(test_data_dir, 'output_{}.pb'.format(i))\n", - " tensor = onnx.TensorProto()\n", - " with open(output_file, 'rb') as f:\n", - " tensor.ParseFromString(f.read())\n", - " ref_outputs.append(numpy_helper.to_array(tensor))\n", - "\n", - "# Run the model on the backend\n", - "outputs = list(backend.run_model(model, inputs))\n", - "\n", - "#from_onnx_to_hpvm(model)\n", - "# Compare the results with reference outputs.\n", - "#for ref_o, o in zip(ref_outputs, outputs):\n", - "# np.testing.assert_almost_equal(ref_o, o)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.7.3" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py b/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py deleted file mode 100644 index b739ed819634f30f4b33173443ac41f848f9c8f1..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/mobilenet_cifar10.py +++ /dev/null @@ -1,161 +0,0 @@ - -import sys -import os -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -from keras.models import Sequential -from keras.layers import * -from keras.datasets import cifar10 -from keras.utils import to_categorical -from keras.callbacks import * -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras import optimizers -import keras.backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm - - -K.set_image_data_format('channels_first') - -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -test_labels = y_test - -print ("X_train.shape = ", X_train.shape) -print ("X_test.shape = ", X_test.shape) - - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - - -mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True) -std = np.std(X_train, axis=(0, 2, 3), keepdims=True) - -X_train = (X_train - mean) / (std + 1e-9) -X_test = (X_test - mean) / (std + 1e-9) - -y_train = to_categorical(y_train, num_classes=10) -y_test = to_categorical(y_test, num_classes=10) - - -def get_mobilenet(alpha=1, depth_multiplier=1): - model = Sequential() - - def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): - channel_axis = 1 - - model.add(Conv2D(filters, kernel, - padding='same', - use_bias=False, - strides=strides, - input_shape=(3, 32, 32))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): - channel_axis = 1 - - model.add(ZeroPadding2D(padding = ((1,1), (1,1) ))) - - model.add(DepthwiseConv2D((3, 3), - padding='valid', - #depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False)) - model.add(BatchNormalization(axis=channel_axis)) - - model.add(Activation('relu')) - model.add(Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - - _conv_block(32, alpha, strides=(1, 1)) - - _depthwise_conv_block(64, alpha, depth_multiplier) - - _depthwise_conv_block(128, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(128, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(256, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(256, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(512, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(512, alpha, depth_multiplier) - _depthwise_conv_block(512, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(512, alpha, depth_multiplier) - _depthwise_conv_block(512, alpha, depth_multiplier) - _depthwise_conv_block(512, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(1024, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(1024, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - model.add(AveragePooling2D(pool_size=2)) - model.add(Flatten()) - model.add(Dense(10, activation='softmax')) - - return model - - -# data augmentation, horizontal flips only -datagen = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=0.0, - width_shift_range=0.0, - height_shift_range=0.0, - vertical_flip=False, - horizontal_flip=True) -datagen.fit(X_train) - - -model = get_mobilenet() - -learning_rates=[] -for i in range(5): - learning_rates.append(2e-2) -for i in range(50-5): - learning_rates.append(1e-2) -for i in range(100-50): - learning_rates.append(8e-3) -for i in range(150-100): - learning_rates.append(4e-3) -for i in range(200-150): - learning_rates.append(2e-3) -for i in range(300-200): - learning_rates.append(1e-3) - -callbacks = [ - LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) -] - -model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), - loss='categorical_crossentropy', - metrics=['accuracy']) - -model.fit_generator( - datagen.flow(X_train, y_train, batch_size=128), - steps_per_epoch=int(np.ceil(50000 / 128)), - validation_data=(X_test, y_test), - #epochs=300, - epochs=50, - callbacks=callbacks -) - -model.summary() - -translate_to_approxhpvm(model, "data/mobilenet_hpvm/", X_test, test_labels, 10) - diff --git a/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py b/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py deleted file mode 100644 index 64df7f98174f22a59f3382ed4337d23e29900051..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/mobilenet_shallow.py +++ /dev/null @@ -1,158 +0,0 @@ -import sys -import os -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -from keras.models import Sequential -from keras.layers import * -from keras.datasets import cifar10 -from keras.utils import to_categorical -from keras.callbacks import * -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras import optimizers -import keras.backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm - - -K.set_image_data_format('channels_first') - -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -test_labels = y_test - -print ("X_train.shape = ", X_train.shape) -print ("X_test.shape = ", X_test.shape) - - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - - -mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True) -std = np.std(X_train, axis=(0, 2, 3), keepdims=True) - -X_train = (X_train - mean) / (std + 1e-9) -X_test = (X_test - mean) / (std + 1e-9) - -y_train = to_categorical(y_train, num_classes=10) -y_test = to_categorical(y_test, num_classes=10) - - -def get_mobilenet(alpha=1, depth_multiplier=1): - model = Sequential() - - def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): - channel_axis = 1 - filters = int(filters * alpha) - model.add(Conv2D(filters, kernel, - padding='same', - use_bias=False, - strides=strides, - input_shape=(3, 32, 32))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): - channel_axis = 1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - model.add(DepthwiseConv2D((3, 3), - padding='same', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False)) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - model.add(Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - - _conv_block(32, alpha, strides=(1, 1)) - - _depthwise_conv_block(64, alpha, depth_multiplier) - - _depthwise_conv_block(128, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(128, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(256, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(256, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(512, alpha, depth_multiplier, - strides=(2, 2)) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# model.add(Dropout(rate=0.5)) - -# _depthwise_conv_block(512, alpha, depth_multiplier) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# model.add(Dropout(rate=0.5)) - -# _depthwise_conv_block(1024, alpha, depth_multiplier, -# strides=(2, 2)) -# _depthwise_conv_block(1024, alpha, depth_multiplier) -# model.add(Dropout(rate=0.5)) - - model.add(AveragePooling2D(pool_size=2)) - model.add(Flatten()) - model.add(Dense(10, activation='softmax')) - - return model - - -# data augmentation, horizontal flips only -datagen = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=0.0, - width_shift_range=0.2, - height_shift_range=0.2, - vertical_flip=False, - horizontal_flip=True) -datagen.fit(X_train) - - -model = get_mobilenet() - -learning_rates=[] -for i in range(5): - learning_rates.append(5e-2) -for i in range(50-5): - learning_rates.append(2e-2) -for i in range(100-50): - learning_rates.append(8e-3) -for i in range(150-100): - learning_rates.append(4e-3) -for i in range(200-150): - learning_rates.append(2e-3) -for i in range(250-200): - learning_rates.append(1e-3) - -callbacks = [ - LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) -] - -model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), - loss='categorical_crossentropy', - metrics=['accuracy']) - -model.fit_generator( - datagen.flow(X_train, y_train, batch_size=128), - steps_per_epoch=int(np.ceil(50000 / 128)), - validation_data=(X_test, y_test), - #epochs=300, - epochs=250, - callbacks=callbacks -) - -model.summary() - -translate_to_approxhpvm(model, "data/mobilenet_shallow/", X_test, test_labels, 10) - diff --git a/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py b/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py deleted file mode 100644 index 2fbed4623d0e57d7a0dd948fa0894127fea72324..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/mobilenetv2_cifar10.py +++ /dev/null @@ -1,176 +0,0 @@ -import sys -import os -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -from keras.models import Sequential -from keras.layers import * -from keras.datasets import cifar10 -from keras.utils import to_categorical -from keras.callbacks import * -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras import optimizers -import keras.backend as K - - -K.set_image_data_format('channels_first') - -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -test_labels = y_test - -print ("X_train.shape = ", X_train.shape) -print ("X_test.shape = ", X_test.shape) - - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - -mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True) -std = np.std(X_train, axis=(0, 1, 2), keepdims=True) -X_train = (X_train - mean) / (std + 1e-9) -X_test = (X_test - mean) / (std + 1e-9) - -y_train = to_categorical(y_train, num_classes=10) -y_test = to_categorical(y_test, num_classes=10) - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - -def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): - channel_axis = 1 - - in_channels = inputs.shape[1] - pointwise_conv_filters = int(filters * alpha) - pointwise_filters = _make_divisible(pointwise_conv_filters, 8) - x = inputs - - if block_id: - x = Conv2D(int(expansion * in_channels), kernel_size=1, strides=1, padding='valid', use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - if stride == 2: - x = ZeroPadding2D(padding=(1, 1))(x) - else: - x = ZeroPadding2D(padding=(1, 1))(x) - - x = DepthwiseConv2D(kernel_size=3, strides=stride, use_bias=False, padding='valid')(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='valid', use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - - - if in_channels == pointwise_filters and stride == 1: - return Add()([inputs, x]) - return x - -def get_mobilenetv2(alpha=1.0, depth_multiplier=1): - - channel_axis = 1 - - first_block_filters = _make_divisible(32 * alpha, 8) - img_input = Input(shape=(3, 32, 32)) - - x = ZeroPadding2D(padding=(1, 1))(img_input) - x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='valid', use_bias=False)(x) - #x = BatchNormalization(axis=channel_axis)(x) - #x = Activation('relu')(x) - - x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0 ) - - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=1 ) - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2 ) - - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3 ) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4 ) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5 ) - - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6 ) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7 ) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8 ) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9 ) - x = Dropout(rate=0.25)(x) - - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) - x = Dropout(rate=0.25)(x) - - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) - x = Dropout(rate=0.25)(x) - - x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) - x = Dropout(rate=0.25)(x) - - if alpha > 1.0: - last_block_filters = _make_divisible(1280 * alpha, 8) - else: - last_block_filters = 1280 - - x = Conv2D(last_block_filters, kernel_size=1, use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - x = AveragePooling2D()(x) - x = Flatten()(x) - x = Dense(10, activation='softmax')(x) - - model = Model(inputs=img_input, outputs=x) - return model - - -# data augmentation, horizontal flips only -datagen = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=0.0, - width_shift_range=0.0, - height_shift_range=0.0, - vertical_flip=False, - horizontal_flip=True) -datagen.fit(X_train) - - -model = get_mobilenetv2() - -learning_rates=[] -for i in range(5): - learning_rates.append(2e-2) -for i in range(50-5): - learning_rates.append(1e-2) -for i in range(100-50): - learning_rates.append(8e-3) -for i in range(150-100): - learning_rates.append(4e-3) -for i in range(200-150): - learning_rates.append(2e-3) -for i in range(300-200): - learning_rates.append(1e-3) - -callbacks = [ - LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) -] - -model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), - loss='categorical_crossentropy', - metrics=['accuracy']) - -model.fit_generator( - datagen.flow(X_train, y_train, batch_size=128), - steps_per_epoch=int(np.ceil(50000 / 128)), - validation_data=(X_test, y_test), - epochs=300, - callbacks=callbacks -) - diff --git a/hpvm/projects/onnx/src/keras_models/resnet.py b/hpvm/projects/onnx/src/keras_models/resnet.py deleted file mode 100644 index 3a9e092170c767d433e438b16fdc38bd9bb9d966..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/resnet.py +++ /dev/null @@ -1,576 +0,0 @@ -""" -#Trains a ResNet on the CIFAR10 dataset. - -ResNet v1: -[Deep Residual Learning for Image Recognition -](https://arxiv.org/pdf/1512.03385.pdf) - -ResNet v2: -[Identity Mappings in Deep Residual Networks -](https://arxiv.org/pdf/1603.05027.pdf) - - -Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti -:------------|--:|-------:|-----------------------:|---: -ResNet20 v1| 3| 92.16 %| 91.25 %|35 -ResNet32 v1| 5| 92.46 %| 92.49 %|50 -ResNet44 v1| 7| 92.50 %| 92.83 %|70 -ResNet56 v1| 9| 92.71 %| 93.03 %|90 -ResNet110 v1| 18| 92.65 %| 93.39+-.16 %|165 -ResNet164 v1| 27| - %| 94.07 %| - -ResNet1001 v1|N/A| - %| 92.39 %| - - - - -Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti -:------------|--:|-------:|-----------------------:|---: -ResNet20 v2| 2| - %| - %|--- -ResNet32 v2|N/A| NA %| NA %| NA -ResNet44 v2|N/A| NA %| NA %| NA -ResNet56 v2| 6| 93.01 %| NA %|100 -ResNet110 v2| 12| 93.15 %| 93.63 %|180 -ResNet164 v2| 18| - %| 94.54 %| - -ResNet1001 v2|111| - %| 95.08+-.14 %| - -""" - -from __future__ import print_function -import keras -from keras.layers import Dense, Conv2D, BatchNormalization, Activation -from keras.layers import AveragePooling2D, Input, Flatten, ZeroPadding2D -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint, LearningRateScheduler -from keras.callbacks import ReduceLROnPlateau -from keras.preprocessing.image import ImageDataGenerator -from keras.regularizers import l2 -from keras import backend as K -from keras.models import Model -from keras.datasets import cifar10 -from keras import backend as K -import numpy as np -import os -import sys -#from approxhpvm_translator import translate_to_approxhpvm -#from weight_utils import dumpCalibrationData - - - -os.environ["CUDA_VISIBLE_DEVICES"] = "0" - - -K.set_image_data_format('channels_first') - - -# Training parameters -batch_size = 32 # orig paper trained all networks with batch_size=128 -#---- epochs = 200 -epochs = 2 -data_augmentation = True -num_classes = 10 - -# Subtracting pixel mean improves accuracy -subtract_pixel_mean = True - -# Model parameter -# ---------------------------------------------------------------------------- -# | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch -# Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti -# |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) -# ---------------------------------------------------------------------------- -# ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) -# ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) -# ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) -# ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) -# ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) -# ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) -# ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) -# --------------------------------------------------------------------------- -n = 3 - -# Model version -# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) -version = 1 - -# Computed depth from supplied model parameter n -if version == 1: - depth = n * 6 + 2 -elif version == 2: - depth = n * 9 + 2 - -# Model name, depth and version -model_type = 'ResNet%dv%d' % (depth, version) - -# Load the CIFAR10 data. -(x_train, y_train), (x_test, y_test) = cifar10.load_data() -test_labels = y_test -train_labels = y_train - -# Input image dimensions. -input_shape = x_train.shape[1:] - -# Normalize data. -x_train = x_train.astype('float32') / 255 -x_test = x_test.astype('float32') / 255 - -# If subtract pixel mean is enabled -if subtract_pixel_mean: - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') -print('y_train shape:', y_train.shape) - -# Convert class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - - - - - - -def lr_schedule(epoch): - """Learning Rate Schedule - - Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. - Called automatically every epoch as part of callbacks during training. - - # Arguments - epoch (int): The number of epochs - - # Returns - lr (float32): learning rate - """ - lr = 1e-3 - if epoch > 180: - lr *= 0.5e-3 - elif epoch > 160: - lr *= 1e-3 - elif epoch > 120: - lr *= 1e-2 - elif epoch > 80: - lr *= 1e-1 - print('Learning rate: ', lr) - return lr - - -def resnet_layer(inputs, - num_filters=16, - kernel_size=3, - strides=1, - activation='relu', - batch_normalization=True, - conv_first=True): - """2D Convolution-Batch Normalization-Activation stack builder - - # Arguments - inputs (tensor): input tensor from input image or previous layer - num_filters (int): Conv2D number of filters - kernel_size (int): Conv2D square kernel dimensions - strides (int): Conv2D square stride dimensions - activation (string): activation name - batch_normalization (bool): whether to include batch normalization - conv_first (bool): conv-bn-activation (True) or - bn-activation-conv (False) - - # Returns - x (tensor): tensor as input to the next layer - """ - conv = Conv2D(num_filters, - kernel_size=kernel_size, - strides=strides, - padding='valid', # NOTE: using valid convs with explicit pad operation - kernel_initializer='he_normal', - kernel_regularizer=l2(1e-4)) - - padding_value = int((kernel_size - 1) / 2) - zero_padding = ZeroPadding2D(padding = (padding_value, padding_value)) - - # FIXME: Temporarily disabled batch normalization - batch_normalization = False - - x = inputs - x = zero_padding(x) - if conv_first: - x = conv(x) - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - else: - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - x = conv(x) - return x - - - -def resnet_v0(input_shape, depth, num_classes=10): - """ResNet Version 1 Model builder [a] - - Stacks of 2 x (3 x 3) Conv2D-BN-ReLU - Last ReLU is after the shortcut connection. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filters is - doubled. Within each stage, the layers have the same number filters and the - same number of filters. - Features maps sizes: - stage 0: 32x32, 16 - stage 1: 16x16, 32 - stage 2: 8x8, 64 - The Number of parameters is approx the same as Table 6 of [a]: - ResNet20 0.27M - ResNet32 0.46M - ResNet44 0.66M - ResNet56 0.85M - ResNet110 1.7M - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 6 != 0: - raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') - # Start model definition. - num_filters = 16 - num_res_blocks = int((depth - 2) / 6) - - inputs = Input(shape=input_shape) - x = resnet_layer(inputs=inputs) - # Instantiate the stack of residual units - for stack in range(3): - for res_block in range(num_res_blocks): - strides = 1 - if stack > 0 and res_block == 0: # first layer but not first stack - strides = 2 # downsample - y = resnet_layer(inputs=x, - num_filters=num_filters, - strides=strides) - y = resnet_layer(inputs=y, - num_filters=num_filters, - activation=None) - if stack > 0 and res_block == 0: # first layer but not first stack - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - x = Activation('relu')(x) - num_filters *= 1 - - # Add classifier on top. - # v1 does not use BN after last shortcut connection-ReLU - #-- x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - x = Dense(64)(y) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(x) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - - -def resnet_v1_1(input_shape, depth, num_classes=10): - """ResNet Version 1 Model builder [a] - - Stacks of 2 x (3 x 3) Conv2D-BN-ReLU - Last ReLU is after the shortcut connection. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filters is - doubled. Within each stage, the layers have the same number filters and the - same number of filters. - Features maps sizes: - stage 0: 32x32, 16 - stage 1: 16x16, 32 - stage 2: 8x8, 64 - The Number of parameters is approx the same as Table 6 of [a]: - ResNet20 0.27M - ResNet32 0.46M - ResNet44 0.66M - ResNet56 0.85M - ResNet110 1.7M - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 6 != 0: - raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') - # Start model definition. - num_filters = 16 - num_res_blocks = int((depth - 2) / 6) - - inputs = Input(shape=input_shape) - x = resnet_layer(inputs=inputs) - # Instantiate the stack of residual units - for stack in range(3): - for res_block in range(num_res_blocks): - strides = 1 - if stack > 0 and res_block == 0: # first layer but not first stack - strides = 2 # downsample - y = resnet_layer(inputs=x, - num_filters=num_filters, - strides=strides) - y = resnet_layer(inputs=y, - num_filters=num_filters, - activation=None) - if stack > 0 and res_block == 0: # first layer but not first stack - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - x = Activation('relu')(x) - num_filters *= 2 - - - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - #activation='softmax', - kernel_initializer='he_normal')(y) - - outputs = Activation('softmax')(outputs) - - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - - - -def resnet_v2(input_shape, depth, num_classes=10): - """ResNet Version 2 Model builder [b] - - Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as - bottleneck layer - First shortcut connection per layer is 1 x 1 Conv2D. - Second and onwards shortcut connection is identity. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filter maps is - doubled. Within each stage, the layers have the same number filters and the - same filter map sizes. - Features maps sizes: - conv1 : 32x32, 16 - stage 0: 32x32, 64 - stage 1: 16x16, 128 - stage 2: 8x8, 256 - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') - # Start model definition. - num_filters_in = 16 - num_res_blocks = int((depth - 2) / 9) - - inputs = Input(shape=input_shape) - # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) - - # Instantiate the stack of residual units - for stage in range(3): - for res_block in range(num_res_blocks): - activation = 'relu' - batch_normalization = True - strides = 1 - if stage == 0: - num_filters_out = num_filters_in * 4 - if res_block == 0: # first layer and first stage - activation = None - batch_normalization = False - else: - num_filters_out = num_filters_in * 2 - if res_block == 0: # first layer but not first stage - strides = 2 # downsample - - # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) - if res_block == 0: - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - - num_filters_in = num_filters_out - - # Add classifier on top. - # v2 has BN-ReLU before Pooling - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - -depth = 20 - -if version == 2: - model = resnet_v2(input_shape=input_shape, depth=depth) -else: - model = resnet_v1_1(input_shape=input_shape, depth=depth) - - -model.compile(loss='categorical_crossentropy', - optimizer=Adam(lr=lr_schedule(0)), - metrics=['accuracy']) -model.summary() -print(model_type) - -# Prepare model model saving directory. -save_dir = os.path.join(os.getcwd(), 'saved_models') -model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type -if not os.path.isdir(save_dir): - os.makedirs(save_dir) -filepath = os.path.join(save_dir, model_name) - -# Prepare callbacks for model saving and for learning rate adjustment. -checkpoint = ModelCheckpoint(filepath=filepath, - monitor='val_acc', - verbose=1, - save_best_only=True) - -lr_scheduler = LearningRateScheduler(lr_schedule) - -lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) - -callbacks = [checkpoint, lr_reducer, lr_scheduler] - -# Run training, with or without data augmentation. -if not data_augmentation: - print('Not using data augmentation.') - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True, - callbacks=callbacks) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - # set input mean to 0 over the dataset - featurewise_center=False, - # set each sample mean to 0 - samplewise_center=False, - # divide inputs by std of dataset - featurewise_std_normalization=False, - # divide each input by its std - samplewise_std_normalization=False, - # apply ZCA whitening - zca_whitening=False, - # epsilon for ZCA whitening - zca_epsilon=1e-06, - # randomly rotate images in the range (deg 0 to 180) - rotation_range=0, - # randomly shift images horizontally - width_shift_range=0.1, - # randomly shift images vertically - height_shift_range=0.1, - # set range for random shear - shear_range=0., - # set range for random zoom - zoom_range=0., - # set range for random channel shifts - channel_shift_range=0., - # set mode for filling points outside the input boundaries - fill_mode='nearest', - # value used for fill_mode = "constant" - cval=0., - # randomly flip images - horizontal_flip=True, - # randomly flip images - vertical_flip=False, - # set rescaling factor (applied before any other transformation) - rescale=None, - # set function that will be applied on each input - preprocessing_function=None, - # image data format, either "channels_first" or "channels_last" - data_format="channels_first", - # fraction of images reserved for validation (strictly between 0 and 1) - validation_split=0.0) - - # Compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), - validation_data=(x_test, y_test), - epochs=epochs, verbose=1, workers=4, - callbacks=callbacks) - -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) - - -#dumpCalibrationData("calibration_data/resnet18_calib.bin", x_train, -# "calibration_data/resnet18_train_labels.bin", train_labels) -#sys.exit(0) - -#translate_to_approxhpvm(model, "resnet18_cifar10_hpvm/", x_test, test_labels) - -#translate_to_approxhpvm(model, "resnet_test/", x_test, test_labels, 'resnet18_cifar10_promise/', y_test) - -import keras2onnx -onnx_model = keras2onnx.convert_keras(model, model.name) -import onnx -onnx.save(onnx_model, "../models/keras/resnet.onnx") diff --git a/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py b/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py deleted file mode 100644 index 64e5d36e78fd728381dc864bf965ff68c4e7cf16..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/vgg16_cifar10.py +++ /dev/null @@ -1,241 +0,0 @@ - - -from __future__ import print_function -import keras -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D, BatchNormalization -from keras import optimizers -import numpy as np -from keras.layers.core import Lambda -from keras import backend as K -from keras import regularizers -import os -import sys -from frontend.approxhpvm_translator import translate_to_approxhpvm -from frontend.weight_utils import dumpCalibrationData - - - -class cifar10vgg: - def __init__(self,train=True): - self.num_classes = 10 - self.weight_decay = 0.0005 - self.x_shape = [3,32,32] - - self.model = self.build_model() - if train: - self.model = self.train(self.model) - else: - self.model.load_weights('cifar10vgg.h5') - - - def build_model(self): - # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. - - model = Sequential() - weight_decay = self.weight_decay - - model.add(Conv2D(64, (3, 3), padding='same', - input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.3)) - - model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout(0.5)) - - model.add(Flatten()) - model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - # model.add(BatchNormalization()) - - model.add(Dropout(0.5)) - model.add(Dense(self.num_classes)) - model.add(Activation('softmax')) - return model - - - def normalize(self,X_train,X_test): - #this function normalize inputs for zero mean and unit variance - # it is used when training a model. - # Input: training set and test set - # Output: normalized training set and test set according to the trianing set statistics. - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train, axis=(0, 1, 2, 3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) - return X_train, X_test - - - def normalize_production(self,x): - #this function is used to normalize instances in production according to saved training set statistics - # Input: X - a training set - # Output X - a normalized training set according to normalization constants. - - #these values produced during first training and are general for the standard cifar10 training set normalization - mean = 120.707 - std = 64.15 - return (x-mean)/(std+1e-7) - - - def predict(self,x,normalize=True,batch_size=50): - if normalize: - x = self.normalize_production(x) - return self.model.predict(x,batch_size) - - - def train(self,model): - - #training parameters - batch_size = 128 - #maxepoches = 250 - #maxepoches = 250 - maxepoches = 30 - learning_rate = 0.01 - lr_decay = 1e-6 - lr_drop = 20 - # The data, shuffled and split between train and test sets: - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train, x_test = self.normalize(x_train, x_test) - - y_train = keras.utils.to_categorical(y_train, self.num_classes) - y_test = keras.utils.to_categorical(y_test, self.num_classes) - - def lr_scheduler(epoch): - return learning_rate * (0.5 ** (epoch // lr_drop)) - reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) - - #data augmentation - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) - width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) - height_shift_range=0.1, # randomly shift images vertically (fraction of total height) - horizontal_flip=True, # randomly flip images - vertical_flip=False) # randomly flip images - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - - - #optimization details - sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) - model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) - - - # training process in a for loop with learning rate drop every 25 epoches. - - historytemp = model.fit_generator(datagen.flow(x_train, y_train, - batch_size=batch_size), - steps_per_epoch=x_train.shape[0] // batch_size, - epochs=maxepoches, - validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) - - model.save_weights('cifar10vgg.h5') - return model - - - -if __name__ == '__main__': - - K.set_image_data_format('channels_first') - - os.environ["CUDA_VISIBLE_DEVICES"] = "1" - - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - test_labels = y_test - train_labels = y_train - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - - y_train = keras.utils.to_categorical(y_train, 10) - y_test = keras.utils.to_categorical(y_test, 10) - - model = cifar10vgg() - - predicted_x = model.predict(x_test) - - norm_test = model.normalize_production(x_test) - - # Normalizing train data before dumping - #x_train, x_test = model.normalize(x_train, x_test) - x_train = model.normalize_production(x_train) - - # dumpCalibrationData("vgg16_cifar_calib.bin", x_train, "vgg16_train_labels.bin", train_labels) - - translate_to_approxhpvm(model.model, "data/vgg16_cifar10/", norm_test, test_labels, 10) - - residuals = np.argmax(predicted_x,1)!=np.argmax(y_test,1) - - loss = sum(residuals)/len(residuals) - print("the validation 0/1 loss is: ",loss) - - - diff --git a/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py b/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py deleted file mode 100644 index 66fe6be669f984b92c8c602332c29e09968e9c8a..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/keras_models/vgg16_cifar100.py +++ /dev/null @@ -1,243 +0,0 @@ - -from __future__ import print_function -import os -import keras -from keras.datasets import cifar100 -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras import optimizers -import numpy as np -from keras.layers.core import Lambda -from keras import backend as K -from keras import regularizers -from approxhpvm_translator import translate_to_approxhpvm -import sys -from weight_utils import dumpCalibrationData - - - -class cifar100vgg: - def __init__(self,train=True): - self.num_classes = 100 - self.weight_decay = 0.0005 - self.x_shape = [3,32,32] - - self.model = self.build_model() - if train: - self.model = self.train(self.model) - else: - self.model.load_weights('cifar100vgg.h5') - - - def build_model(self): - # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. - - model = Sequential() - weight_decay = self.weight_decay - - model.add(Conv2D(64, (3, 3), padding='same', - input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.3)) - - model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(MaxPooling2D(pool_size=(2, 2))) - - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - model.add(Dropout(0.4)) - - model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout(0.5)) - - model.add(Flatten()) - model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation('relu')) - #model.add(BatchNormalization()) - - model.add(Dropout(0.5)) - model.add(Dense(self.num_classes)) - model.add(Activation('softmax')) - return model - - - def normalize(self,X_train,X_test): - #this function normalize inputs for zero mean and unit variance - # it is used when training a model. - # Input: training set and test set - # Output: normalized training set and test set according to the trianing set statistics. - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train, axis=(0, 1, 2, 3)) - print(mean) - print(std) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) - return X_train, X_test - - def normalize_production(self,x): - #this function is used to normalize instances in production according to saved training set statistics - # Input: X - a training set - # Output X - a normalized training set according to normalization constants. - - #these values produced during first training and are general for the standard cifar10 training set normalization - mean = 121.936 - std = 68.389 - return (x-mean)/(std+1e-7) - - def predict(self,x,normalize=True,batch_size=50): - if normalize: - x = self.normalize_production(x) - return self.model.predict(x,batch_size) - - def train(self,model): - - #training parameters - batch_size = 128 - #maxepoches = 250 - #maxepoches = 400 - maxepoches = 4 - learning_rate = 0.05 - lr_decay = 1e-6 - lr_drop = 20 - - # The data, shuffled and split between train and test sets: - (x_train, y_train), (x_test, y_test) = cifar100.load_data() - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train, x_test = self.normalize(x_train, x_test) - - y_train = keras.utils.to_categorical(y_train, self.num_classes) - y_test = keras.utils.to_categorical(y_test, self.num_classes) - - - def lr_scheduler(epoch): - return learning_rate * (0.5 ** (epoch // lr_drop)) - reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) - - - #data augmentation - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) - width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) - height_shift_range=0.1, # randomly shift images vertically (fraction of total height) - horizontal_flip=True, # randomly flip images - vertical_flip=False) # randomly flip images - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - - - #optimization details - sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) - model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) - - - # training process in a for loop with learning rate drop every 25 epoches. - - historytemp = model.fit_generator(datagen.flow(x_train, y_train, - batch_size=batch_size), - steps_per_epoch=x_train.shape[0] // batch_size, - epochs=maxepoches, - validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) - model.save_weights('cifar100vgg.h5') - return model - -if __name__ == '__main__': - - K.set_image_data_format('channels_first') - os.environ["CUDA_VISIBLE_DEVICES"] = "1" - - (x_train, y_train), (x_test, y_test) = cifar100.load_data() - test_labels = y_test - train_labels = y_train - - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - - y_train = keras.utils.to_categorical(y_train, 100) - y_test = keras.utils.to_categorical(y_test, 100) - - model = cifar100vgg() - - predicted_x = model.predict(x_test) - - norm_test = model.normalize_production(x_test) - - x_train = model.normalize_production(x_train) - - dumpCalibrationData("calibration_data/vgg16_cifar100_calib.bin", x_train, - "calibration_data/vgg16_cifar100_train_labels.bin", train_labels) - sys.exit(0) - - - translate_to_approxhpvm(model.model, "vgg16_cifar100_test/", norm_test, test_labels, - "vgg16_cifar100_front", y_test) - - - residuals = (np.argmax(predicted_x,1)!=np.argmax(y_test,1)) - loss = sum(residuals)/len(residuals) - print("the validation 0/1 loss is: ",loss) - - diff --git a/hpvm/projects/onnx/src/lenet_keras.ipynb b/hpvm/projects/onnx/src/lenet_keras.ipynb deleted file mode 100644 index bd83db10050f74988dc313991d06b99e247cf011..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/lenet_keras.ipynb +++ /dev/null @@ -1,199 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import sys\n", - "import keras\n", - "from keras.datasets import mnist\n", - "from keras.models import Sequential\n", - "from keras.layers import Dense, Dropout, Flatten, Activation\n", - "from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D\n", - "from keras import backend as K" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "batch_size = 128\n", - "num_classes = 10\n", - "# input image dimensions\n", - "img_rows, img_cols = 28, 28 " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Changing Keras data format to NCHW - NHWC is default\n", - "# NOTE: ApproxHPVM requires NCHW format\n", - "K.set_image_data_format('channels_first')\n", - "\n", - "# Loads Mnist dataset\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "test_labels = y_test\n", - "\n", - "# Reshaping data to be NCHW format \n", - "x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n", - "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", - "input_shape = (1, img_rows, img_cols)\n", - "\n", - "\n", - "# Data Normalization \n", - "x_train = x_train.astype('float32')\n", - "x_test = x_test.astype('float32')\n", - "x_train /= 255\n", - "x_test /= 255\n", - "\n", - "\n", - "# convert class vectors to binary class matrices - required by Keras\n", - "y_train = keras.utils.to_categorical(y_train, num_classes)\n", - "y_test = keras.utils.to_categorical(y_test, num_classes)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Train on 60000 samples, validate on 10000 samples\n", - "Epoch 1/5\n", - "60000/60000 [==============================] - 8s 131us/step - loss: 0.2878 - acc: 0.9072 - val_loss: 0.0514 - val_acc: 0.9814\n", - "Epoch 2/5\n", - "60000/60000 [==============================] - 5s 83us/step - loss: 0.0459 - acc: 0.9854 - val_loss: 0.0337 - val_acc: 0.9888\n", - "Epoch 3/5\n", - "60000/60000 [==============================] - 8s 127us/step - loss: 0.0290 - acc: 0.9908 - val_loss: 0.0275 - val_acc: 0.9902\n", - "Epoch 4/5\n", - "60000/60000 [==============================] - 9s 151us/step - loss: 0.0195 - acc: 0.9940 - val_loss: 0.0367 - val_acc: 0.9883\n", - "Epoch 5/5\n", - "60000/60000 [==============================] - 10s 169us/step - loss: 0.0136 - acc: 0.9960 - val_loss: 0.0318 - val_acc: 0.9905\n", - "Test loss: 0.031750623502753844\n", - "Test accuracy: 0.9905\n" - ] - } - ], - "source": [ - "# Network Compostion: 3 Conv Layers, 2 Dense Layers\n", - "model = Sequential()\n", - "\n", - "# ConvLayer1\n", - "model.add(Conv2D(32, kernel_size=(5, 5),\n", - " activation='relu',\n", - " padding = 'same',\n", - " input_shape=input_shape))\n", - "model.add(MaxPooling2D(pool_size=(2, 2)))\n", - "\n", - "# ConvLayer2\n", - "model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same'))\n", - "\n", - "# ConvLayer3\n", - "# NOTE: ZeroPading needed for ConvLayer with strides > 1\n", - "model.add(ZeroPadding2D(padding = (1,1)))\n", - "model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') )\n", - "\n", - "model.add(Flatten())\n", - "# DenseLayer1\n", - "model.add(Dense(1024, activation='relu'))\n", - "# DenseLayer2\n", - "model.add(Dense(num_classes, activation='relu'))\n", - "# Softmax Layer\n", - "model.add(Activation('softmax'))\n", - "\n", - "\n", - "# Configures model for training \n", - "model.compile(loss=keras.losses.categorical_crossentropy,\n", - " optimizer=keras.optimizers.Adadelta(),\n", - " metrics=['accuracy'])\n", - "\n", - "# Training\n", - "model.fit(x_train, y_train,\n", - " batch_size=batch_size,\n", - " epochs=5,\n", - " verbose=1,\n", - " validation_data=(x_test, y_test))\n", - "\n", - "\n", - "# Inference\n", - "score = model.evaluate(x_test, y_test, verbose=0)\n", - "print('Test loss:', score[0])\n", - "print('Test accuracy:', score[1])" - ] - }, - { - "cell_type": "code", - "execution_count": 58, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 1.037928581237793\n" - ] - } - ], - "source": [ - "import time\n", - "start = time.time()\n", - "keras_result = model.predict(x_test[:8000])\n", - "print(\"time:\", time.time() - start)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import keras2onnx\n", - "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", - "import onnx\n", - "onnx.save(onnx_model, \"../models/keras/lenet.onnx\")\n", - "import pickle\n", - "with open('dumps/lenet_keras_dump', 'wb') as fp:\n", - " pickle.dump(keras_result, fp)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/lenet_onnx.ipynb b/hpvm/projects/onnx/src/lenet_onnx.ipynb deleted file mode 100644 index fdf69ebc3cde8c1e12bee99bcde574786ffd0c18..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/lenet_onnx.ipynb +++ /dev/null @@ -1,191 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 37, - "metadata": {}, - "outputs": [], - "source": [ - "import sys\n", - "import keras\n", - "from keras.datasets import mnist\n", - "from keras import backend as K\n", - "import numpy as np\n", - "import os\n", - "import keras2onnx\n", - "import onnx\n", - "import onnxruntime\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 38, - "metadata": {}, - "outputs": [], - "source": [ - "batch_size = 128\n", - "num_classes = 10\n", - "# input image dimensions\n", - "img_rows, img_cols = 28, 28 " - ] - }, - { - "cell_type": "code", - "execution_count": 39, - "metadata": {}, - "outputs": [], - "source": [ - "# Changing Keras data format to NCHW - NHWC is default\n", - "# NOTE: ApproxHPVM requires NCHW format\n", - "K.set_image_data_format('channels_first')\n", - "\n", - "# Loads Mnist dataset\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "test_labels = y_test\n", - "\n", - "# Reshaping data to be NCHW format \n", - "x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols)\n", - "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", - "input_shape = (1, img_rows, img_cols)\n", - "\n", - "\n", - "# Data Normalization \n", - "x_train = x_train.astype('float32')\n", - "x_test = x_test.astype('float32')\n", - "x_train /= 255\n", - "x_test /= 255\n", - "\n", - "\n", - "# convert class vectors to binary class matrices - required by Keras\n", - "y_train = keras.utils.to_categorical(y_train, num_classes)\n", - "y_test = keras.utils.to_categorical(y_test, num_classes)" - ] - }, - { - "cell_type": "code", - "execution_count": 40, - "metadata": {}, - "outputs": [], - "source": [ - "sess = onnxruntime.InferenceSession(\"../models/keras/lenet.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 41, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input name : conv2d_1_input\n", - "Input shape : [None, 1, 28, 28]\n", - "Input type : tensor(float)\n" - ] - } - ], - "source": [ - "input_name = sess.get_inputs()[0].name\n", - "print(\"Input name :\", input_name)\n", - "input_shape = sess.get_inputs()[0].shape\n", - "print(\"Input shape :\", input_shape)\n", - "input_type = sess.get_inputs()[0].type\n", - "print(\"Input type :\", input_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 42, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Output name : activation_1\n", - "Output shape : [None, 10]\n", - "Output type : tensor(float)\n" - ] - } - ], - "source": [ - "output_name = sess.get_outputs()[0].name\n", - "print(\"Output name :\", output_name) \n", - "output_shape = sess.get_outputs()[0].shape\n", - "print(\"Output shape :\", output_shape)\n", - "output_type = sess.get_outputs()[0].type\n", - "print(\"Output type :\", output_type)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "start = time.time()\n", - "ort_result = sess.run([output_name], {input_name: x_test[:8000]})\n", - "print(\"time:\", time.time() - start)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "with open('dumps/lenet_ort_dump', 'wb') as fp:\n", - " pickle.dump(ort_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/lenet_ort_dump', 'rb') as fp:\n", - " ort_res = pickle.load(fp)\n", - "with open ('dumps/lenet_keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(ort_res[0], keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/lenet_tvm.ipynb b/hpvm/projects/onnx/src/lenet_tvm.ipynb deleted file mode 100644 index 274553dcc52d2b84f601a4480b04adc54a1ac0ea..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/lenet_tvm.ipynb +++ /dev/null @@ -1,241 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import mnist\n", - "from keras import backend as K\n", - "import keras\n", - "import sys\n", - "import struct\n", - "import numpy as np\n", - "import os\n", - "import tvm\n", - "import tvm.relay as relay\n", - "from tvm.contrib import graph_runtime\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "batch_size = 128\n", - "num_classes = 10\n", - "# input image dimensions\n", - "img_rows, img_cols = 28, 28 " - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "# Changing Keras data format to NCHW - NHWC is default\n", - "# NOTE: ApproxHPVM requires NCHW format\n", - "K.set_image_data_format('channels_first')\n", - "\n", - "# Loads Mnist dataset\n", - "(x_train, y_train), (x_test, y_test) = mnist.load_data()\n", - "test_labels = y_test\n", - "x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols)\n", - "input_shape = (1, img_rows, img_cols)\n", - "x_test = x_test.astype('float32')\n", - "x_test /= 255" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "import onnx\n", - "onnx_model = onnx.load(\"../models/keras/lenet.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 14, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n" - ] - } - ], - "source": [ - "target='cuda -libs=cudnn,cublas'\n", - "input_name = 'conv2d_1_input'\n", - "shape_dict = {input_name: x_test.shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "ctx = tvm.gpu()\n", - "with relay.build_config(opt_level=3):\n", - " executor = relay.build_module.create_executor('graph', mod, ctx, target)" - ] - }, - { - "cell_type": "code", - "execution_count": 15, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (10000, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (10000, 3136, 'float32'), (1024, 3136, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 64, 14, 14, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 32, 14, 14, 'float32'), (64, 32, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (10000, 1, 28, 28, 'float32'), (32, 1, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" - ] - } - ], - "source": [ - "tvm_out = executor.evaluate()(tvm.nd.array(x_test), **params)" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (8000, 3136, 'float32'), (1024, 3136, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 64, 14, 14, 'float32'), (64, 64, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 32, 14, 14, 'float32'), (64, 32, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (8000, 1, 28, 28, 'float32'), (32, 1, 5, 5, 'float32'), (1, 1), (2, 2), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" - ] - } - ], - "source": [ - "input_name = 'conv2d_1_input'\n", - "input_size = 8000\n", - "shape_dict = {input_name: x_test[:input_size].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "target = 'cuda -libs=cudnn,cublas'\n", - "with relay.build_config(opt_level=3):\n", - " graph, lib, params = relay.build(mod, target, params=params)" - ] - }, - { - "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time: 0.043964385986328125\n" - ] - } - ], - "source": [ - "import time\n", - "ctx = tvm.gpu()\n", - "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", - "# create module\n", - "module = graph_runtime.create(graph, lib, ctx)\n", - "# set input and parameters\n", - "module.set_input(\"conv2d_1_input\", x_test[:input_size])\n", - "module.set_input(**params)\n", - "# run\n", - "start_time = time.time()\n", - "module.run()\n", - "out_shape = (input_size, 10)\n", - "# get output\n", - "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", - "print(\"Time:\", time.time() - start_time)\n", - "#ftimer = module.module.time_evaluator(\"get_output\", ctx, number=1)\n", - "#prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond\n", - "#print(\"%-20s %-19s (%s)\" % (\"lenet\", \"%.2f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "with open('dumps/lenet_tvm_dump', 'wb') as fp:\n", - " pickle.dump(out, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/lenet_tvm_dump', 'rb') as fp:\n", - " tvm_res = pickle.load(fp)\n", - "with open ('dumps/lenet_keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(tvm_res, keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/mobilenet_keras.ipynb b/hpvm/projects/onnx/src/mobilenet_keras.ipynb deleted file mode 100644 index 5175bc2aef1403fec6a70ffb0cdd504a062d5f96..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/mobilenet_keras.ipynb +++ /dev/null @@ -1,592 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import sys\n", - "import os\n", - "\n", - "from keras.models import Sequential\n", - "from keras.layers import *\n", - "from keras.datasets import cifar10\n", - "from keras.utils import to_categorical\n", - "from keras.callbacks import *\n", - "from keras.preprocessing.image import ImageDataGenerator\n", - "from keras.models import Model\n", - "from keras import optimizers\n", - "import keras.backend as K\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X_train.shape = (50000, 3, 32, 32)\n", - "X_test.shape = (10000, 3, 32, 32)\n" - ] - } - ], - "source": [ - "K.set_image_data_format('channels_first')\n", - "\n", - "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", - "test_labels = y_test\n", - "\n", - "print (\"X_train.shape = \", X_train.shape)\n", - "print (\"X_test.shape = \", X_test.shape)\n", - "\n", - "\n", - "X_train = X_train.astype('float32')\n", - "X_test = X_test.astype('float32')\n", - "\n", - "\n", - "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n", - "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n", - "\n", - "X_train = (X_train - mean) / (std + 1e-9)\n", - "X_test = (X_test - mean) / (std + 1e-9)\n", - "\n", - "y_train = to_categorical(y_train, num_classes=10)\n", - "y_test = to_categorical(y_test, num_classes=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "def get_mobilenet(alpha=1, depth_multiplier=1):\n", - " model = Sequential()\n", - " \n", - " def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)):\n", - " channel_axis = 1\n", - " \n", - " model.add(Conv2D(filters, kernel,\n", - " padding='same',\n", - " use_bias=False,\n", - " strides=strides, \n", - " input_shape=(3, 32, 32)))\n", - " model.add(BatchNormalization(axis=channel_axis))\n", - " model.add(Activation('relu'))\n", - " \n", - " def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)):\n", - " channel_axis = 1 \n", - "\n", - " model.add(ZeroPadding2D(padding = ((1,1), (1,1) )))\n", - "\n", - " model.add(DepthwiseConv2D((3, 3),\n", - " padding='valid',\n", - " #depth_multiplier=depth_multiplier,\n", - " strides=strides,\n", - " use_bias=False)) \n", - " model.add(BatchNormalization(axis=channel_axis))\n", - " \n", - " model.add(Activation('relu'))\n", - " model.add(Conv2D(pointwise_conv_filters, (1, 1),\n", - " padding='same',\n", - " use_bias=False,\n", - " strides=(1, 1)))\n", - " model.add(BatchNormalization(axis=channel_axis))\n", - " model.add(Activation('relu'))\n", - "\n", - "\n", - " _conv_block(32, alpha, strides=(1, 1))\n", - " \n", - " _depthwise_conv_block(64, alpha, depth_multiplier)\n", - " \n", - " _depthwise_conv_block(128, alpha, depth_multiplier,\n", - " strides=(2, 2))\n", - " _depthwise_conv_block(128, alpha, depth_multiplier)\n", - " model.add(Dropout(rate=0.5))\n", - "\n", - " _depthwise_conv_block(256, alpha, depth_multiplier, \n", - " strides=(2, 2))\n", - " _depthwise_conv_block(256, alpha, depth_multiplier)\n", - " model.add(Dropout(rate=0.5))\n", - "\n", - " _depthwise_conv_block(512, alpha, depth_multiplier,\n", - " strides=(2, 2))\n", - " _depthwise_conv_block(512, alpha, depth_multiplier)\n", - " _depthwise_conv_block(512, alpha, depth_multiplier)\n", - " model.add(Dropout(rate=0.5))\n", - " \n", - " _depthwise_conv_block(512, alpha, depth_multiplier)\n", - " _depthwise_conv_block(512, alpha, depth_multiplier)\n", - " _depthwise_conv_block(512, alpha, depth_multiplier)\n", - " model.add(Dropout(rate=0.5))\n", - " \n", - " _depthwise_conv_block(1024, alpha, depth_multiplier,\n", - " strides=(2, 2))\n", - " _depthwise_conv_block(1024, alpha, depth_multiplier)\n", - " model.add(Dropout(rate=0.5))\n", - "\n", - " model.add(AveragePooling2D(pool_size=2))\n", - " model.add(Flatten())\n", - " model.add(Dense(10, activation='softmax'))\n", - "\n", - " return model" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Epoch 1/50\n", - "391/391 [==============================] - 36s 92ms/step - loss: 2.0463 - acc: 0.2513 - val_loss: 2.2584 - val_acc: 0.2776\n", - "Epoch 2/50\n", - "391/391 [==============================] - 31s 80ms/step - loss: 1.6583 - acc: 0.3977 - val_loss: 1.9809 - val_acc: 0.3528\n", - "Epoch 3/50\n", - "391/391 [==============================] - 32s 81ms/step - loss: 1.4632 - acc: 0.4740 - val_loss: 1.6017 - val_acc: 0.4475\n", - "Epoch 4/50\n", - "391/391 [==============================] - 32s 82ms/step - loss: 1.3280 - acc: 0.5213 - val_loss: 1.3650 - val_acc: 0.5158\n", - "Epoch 5/50\n", - "391/391 [==============================] - 32s 82ms/step - loss: 1.1912 - acc: 0.5727 - val_loss: 1.2064 - val_acc: 0.5807\n", - "Epoch 6/50\n", - "391/391 [==============================] - 31s 80ms/step - loss: 1.0700 - acc: 0.6203 - val_loss: 1.1014 - val_acc: 0.6085\n", - "Epoch 7/50\n", - "391/391 [==============================] - 31s 80ms/step - loss: 1.0232 - acc: 0.6350 - val_loss: 1.0276 - val_acc: 0.6331\n", - "Epoch 8/50\n", - "391/391 [==============================] - 31s 79ms/step - loss: 0.9783 - acc: 0.6498 - val_loss: 1.0276 - val_acc: 0.6398\n", - "Epoch 9/50\n", - "391/391 [==============================] - 31s 78ms/step - loss: 0.9359 - acc: 0.6673 - val_loss: 1.0931 - val_acc: 0.6226\n", - "Epoch 10/50\n", - "391/391 [==============================] - 30s 76ms/step - loss: 0.9058 - acc: 0.6776 - val_loss: 1.1184 - val_acc: 0.6105\n", - "Epoch 11/50\n", - "391/391 [==============================] - 30s 76ms/step - loss: 0.8781 - acc: 0.6884 - val_loss: 1.0405 - val_acc: 0.6498\n", - "Epoch 12/50\n", - "391/391 [==============================] - 29s 74ms/step - loss: 0.8502 - acc: 0.6998 - val_loss: 1.3313 - val_acc: 0.5514\n", - "Epoch 13/50\n", - "391/391 [==============================] - 29s 75ms/step - loss: 0.8211 - acc: 0.7077 - val_loss: 1.0371 - val_acc: 0.6443\n", - "Epoch 14/50\n", - "391/391 [==============================] - 28s 73ms/step - loss: 0.7958 - acc: 0.7181 - val_loss: 0.8624 - val_acc: 0.7061\n", - "Epoch 15/50\n", - "391/391 [==============================] - 29s 74ms/step - loss: 0.7720 - acc: 0.7257 - val_loss: 0.8090 - val_acc: 0.7224\n", - "Epoch 16/50\n", - "391/391 [==============================] - 29s 74ms/step - loss: 0.7508 - acc: 0.7324 - val_loss: 0.7846 - val_acc: 0.7272\n", - "Epoch 17/50\n", - "391/391 [==============================] - 30s 76ms/step - loss: 0.7292 - acc: 0.7432 - val_loss: 0.7717 - val_acc: 0.7323\n", - "Epoch 18/50\n", - "391/391 [==============================] - 33s 85ms/step - loss: 0.7047 - acc: 0.7511 - val_loss: 0.7407 - val_acc: 0.7453\n", - "Epoch 19/50\n", - "391/391 [==============================] - 33s 85ms/step - loss: 0.6884 - acc: 0.7570 - val_loss: 0.7040 - val_acc: 0.7537\n", - "Epoch 20/50\n", - "391/391 [==============================] - 33s 85ms/step - loss: 0.6718 - acc: 0.7652 - val_loss: 0.9103 - val_acc: 0.6907\n", - "Epoch 21/50\n", - "391/391 [==============================] - 33s 85ms/step - loss: 0.6489 - acc: 0.7722 - val_loss: 0.7054 - val_acc: 0.7567\n", - "Epoch 22/50\n", - "391/391 [==============================] - 33s 84ms/step - loss: 0.6330 - acc: 0.7771 - val_loss: 0.7032 - val_acc: 0.7587\n", - "Epoch 23/50\n", - "391/391 [==============================] - 33s 86ms/step - loss: 0.6157 - acc: 0.7831 - val_loss: 0.6955 - val_acc: 0.7652\n", - "Epoch 24/50\n", - "391/391 [==============================] - 32s 83ms/step - loss: 0.6027 - acc: 0.7882 - val_loss: 0.7599 - val_acc: 0.7419\n", - "Epoch 25/50\n", - "391/391 [==============================] - 32s 83ms/step - loss: 0.5910 - acc: 0.7932 - val_loss: 0.6315 - val_acc: 0.7830\n", - "Epoch 26/50\n", - "391/391 [==============================] - 33s 84ms/step - loss: 0.5743 - acc: 0.7967 - val_loss: 0.6595 - val_acc: 0.7772\n", - "Epoch 27/50\n", - "391/391 [==============================] - 32s 82ms/step - loss: 0.5610 - acc: 0.8027 - val_loss: 0.6628 - val_acc: 0.7691\n", - "Epoch 28/50\n", - "391/391 [==============================] - 31s 80ms/step - loss: 0.5508 - acc: 0.8074 - val_loss: 0.6211 - val_acc: 0.7820\n", - "Epoch 29/50\n", - "391/391 [==============================] - 31s 81ms/step - loss: 0.5293 - acc: 0.8130 - val_loss: 0.6221 - val_acc: 0.7852\n", - "Epoch 30/50\n", - "391/391 [==============================] - 30s 78ms/step - loss: 0.5205 - acc: 0.8158 - val_loss: 0.6252 - val_acc: 0.7891\n", - "Epoch 31/50\n", - "391/391 [==============================] - 31s 79ms/step - loss: 0.5128 - acc: 0.8195 - val_loss: 0.6213 - val_acc: 0.7844\n", - "Epoch 32/50\n", - "391/391 [==============================] - 29s 75ms/step - loss: 0.4988 - acc: 0.8241 - val_loss: 0.5745 - val_acc: 0.8046\n", - "Epoch 33/50\n", - "391/391 [==============================] - 30s 76ms/step - loss: 0.4898 - acc: 0.8273 - val_loss: 0.5938 - val_acc: 0.8015\n", - "Epoch 34/50\n", - "391/391 [==============================] - 28s 72ms/step - loss: 0.4789 - acc: 0.8294 - val_loss: 0.5693 - val_acc: 0.8074\n", - "Epoch 35/50\n", - "391/391 [==============================] - 28s 72ms/step - loss: 0.4646 - acc: 0.8371 - val_loss: 0.5743 - val_acc: 0.8042\n", - "Epoch 36/50\n", - "391/391 [==============================] - 29s 74ms/step - loss: 0.4605 - acc: 0.8364 - val_loss: 0.6270 - val_acc: 0.7865\n", - "Epoch 37/50\n", - "391/391 [==============================] - 33s 84ms/step - loss: 0.4499 - acc: 0.8405 - val_loss: 0.6014 - val_acc: 0.7960\n", - "Epoch 38/50\n", - "391/391 [==============================] - 32s 83ms/step - loss: 0.4459 - acc: 0.8438 - val_loss: 0.6058 - val_acc: 0.7971\n", - "Epoch 39/50\n", - "391/391 [==============================] - 33s 84ms/step - loss: 0.4341 - acc: 0.8473 - val_loss: 0.5551 - val_acc: 0.8125\n", - "Epoch 40/50\n", - "391/391 [==============================] - 33s 83ms/step - loss: 0.4238 - acc: 0.8496 - val_loss: 0.5445 - val_acc: 0.8165\n", - "Epoch 41/50\n", - "391/391 [==============================] - 33s 83ms/step - loss: 0.4160 - acc: 0.8516 - val_loss: 0.5873 - val_acc: 0.8058\n", - "Epoch 42/50\n", - "391/391 [==============================] - 32s 83ms/step - loss: 0.4048 - acc: 0.8546 - val_loss: 0.6693 - val_acc: 0.7822\n", - "Epoch 43/50\n", - "391/391 [==============================] - 33s 83ms/step - loss: 0.4021 - acc: 0.8583 - val_loss: 0.5408 - val_acc: 0.8168\n", - "Epoch 44/50\n", - "391/391 [==============================] - 33s 84ms/step - loss: 0.3927 - acc: 0.8610 - val_loss: 0.5538 - val_acc: 0.8117\n", - "Epoch 45/50\n", - "391/391 [==============================] - 33s 83ms/step - loss: 0.3904 - acc: 0.8615 - val_loss: 0.5981 - val_acc: 0.8063\n", - "Epoch 46/50\n", - "391/391 [==============================] - 32s 81ms/step - loss: 0.3811 - acc: 0.8642 - val_loss: 0.5285 - val_acc: 0.8223\n", - "Epoch 47/50\n", - "391/391 [==============================] - 32s 83ms/step - loss: 0.3726 - acc: 0.8676 - val_loss: 0.5317 - val_acc: 0.8206\n", - "Epoch 48/50\n", - "391/391 [==============================] - 31s 79ms/step - loss: 0.3661 - acc: 0.8692 - val_loss: 0.5568 - val_acc: 0.8172\n", - "Epoch 49/50\n", - "391/391 [==============================] - 31s 79ms/step - loss: 0.3582 - acc: 0.8711 - val_loss: 0.5057 - val_acc: 0.8320\n", - "Epoch 50/50\n", - "391/391 [==============================] - 29s 75ms/step - loss: 0.3516 - acc: 0.8740 - val_loss: 0.5760 - val_acc: 0.8099\n", - "_________________________________________________________________\n", - "Layer (type) Output Shape Param # \n", - "=================================================================\n", - "conv2d_1 (Conv2D) (None, 32, 32, 32) 864 \n", - "_________________________________________________________________\n", - "batch_normalization_1 (Batch (None, 32, 32, 32) 128 \n", - "_________________________________________________________________\n", - "activation_1 (Activation) (None, 32, 32, 32) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_1 (ZeroPaddin (None, 32, 34, 34) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_1 (Depthwis (None, 32, 32, 32) 288 \n", - "_________________________________________________________________\n", - "batch_normalization_2 (Batch (None, 32, 32, 32) 128 \n", - "_________________________________________________________________\n", - "activation_2 (Activation) (None, 32, 32, 32) 0 \n", - "_________________________________________________________________\n", - "conv2d_2 (Conv2D) (None, 64, 32, 32) 2048 \n", - "_________________________________________________________________\n", - "batch_normalization_3 (Batch (None, 64, 32, 32) 256 \n", - "_________________________________________________________________\n", - "activation_3 (Activation) (None, 64, 32, 32) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_2 (ZeroPaddin (None, 64, 34, 34) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_2 (Depthwis (None, 64, 16, 16) 576 \n", - "_________________________________________________________________\n", - "batch_normalization_4 (Batch (None, 64, 16, 16) 256 \n", - "_________________________________________________________________\n", - "activation_4 (Activation) (None, 64, 16, 16) 0 \n", - "_________________________________________________________________\n", - "conv2d_3 (Conv2D) (None, 128, 16, 16) 8192 \n", - "_________________________________________________________________\n", - "batch_normalization_5 (Batch (None, 128, 16, 16) 512 \n", - "_________________________________________________________________\n", - "activation_5 (Activation) (None, 128, 16, 16) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_3 (ZeroPaddin (None, 128, 18, 18) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_3 (Depthwis (None, 128, 16, 16) 1152 \n", - "_________________________________________________________________\n", - "batch_normalization_6 (Batch (None, 128, 16, 16) 512 \n", - "_________________________________________________________________\n", - "activation_6 (Activation) (None, 128, 16, 16) 0 \n", - "_________________________________________________________________\n", - "conv2d_4 (Conv2D) (None, 128, 16, 16) 16384 \n", - "_________________________________________________________________\n", - "batch_normalization_7 (Batch (None, 128, 16, 16) 512 \n", - "_________________________________________________________________\n", - "activation_7 (Activation) (None, 128, 16, 16) 0 \n", - "_________________________________________________________________\n", - "dropout_1 (Dropout) (None, 128, 16, 16) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_4 (ZeroPaddin (None, 128, 18, 18) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_4 (Depthwis (None, 128, 8, 8) 1152 \n", - "_________________________________________________________________\n", - "batch_normalization_8 (Batch (None, 128, 8, 8) 512 \n", - "_________________________________________________________________\n", - "activation_8 (Activation) (None, 128, 8, 8) 0 \n", - "_________________________________________________________________\n", - "conv2d_5 (Conv2D) (None, 256, 8, 8) 32768 \n", - "_________________________________________________________________\n", - "batch_normalization_9 (Batch (None, 256, 8, 8) 1024 \n", - "_________________________________________________________________\n", - "activation_9 (Activation) (None, 256, 8, 8) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_5 (ZeroPaddin (None, 256, 10, 10) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_5 (Depthwis (None, 256, 8, 8) 2304 \n", - "_________________________________________________________________\n", - "batch_normalization_10 (Batc (None, 256, 8, 8) 1024 \n", - "_________________________________________________________________\n", - "activation_10 (Activation) (None, 256, 8, 8) 0 \n", - "_________________________________________________________________\n", - "conv2d_6 (Conv2D) (None, 256, 8, 8) 65536 \n", - "_________________________________________________________________\n", - "batch_normalization_11 (Batc (None, 256, 8, 8) 1024 \n", - "_________________________________________________________________\n", - "activation_11 (Activation) (None, 256, 8, 8) 0 \n", - "_________________________________________________________________\n", - "dropout_2 (Dropout) (None, 256, 8, 8) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_6 (ZeroPaddin (None, 256, 10, 10) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_6 (Depthwis (None, 256, 4, 4) 2304 \n", - "_________________________________________________________________\n", - "batch_normalization_12 (Batc (None, 256, 4, 4) 1024 \n", - "_________________________________________________________________\n", - "activation_12 (Activation) (None, 256, 4, 4) 0 \n", - "_________________________________________________________________\n", - "conv2d_7 (Conv2D) (None, 512, 4, 4) 131072 \n", - "_________________________________________________________________\n", - "batch_normalization_13 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_13 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_7 (ZeroPaddin (None, 512, 6, 6) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_7 (Depthwis (None, 512, 4, 4) 4608 \n", - "_________________________________________________________________\n", - "batch_normalization_14 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_14 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "conv2d_8 (Conv2D) (None, 512, 4, 4) 262144 \n", - "_________________________________________________________________\n", - "batch_normalization_15 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_15 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_8 (ZeroPaddin (None, 512, 6, 6) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_8 (Depthwis (None, 512, 4, 4) 4608 \n", - "_________________________________________________________________\n", - "batch_normalization_16 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_16 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "conv2d_9 (Conv2D) (None, 512, 4, 4) 262144 \n", - "_________________________________________________________________\n", - "batch_normalization_17 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_17 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "dropout_3 (Dropout) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_9 (ZeroPaddin (None, 512, 6, 6) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_9 (Depthwis (None, 512, 4, 4) 4608 \n", - "_________________________________________________________________\n", - "batch_normalization_18 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_18 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "conv2d_10 (Conv2D) (None, 512, 4, 4) 262144 \n", - "_________________________________________________________________\n", - "batch_normalization_19 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_19 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_10 (ZeroPaddi (None, 512, 6, 6) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_10 (Depthwi (None, 512, 4, 4) 4608 \n", - "_________________________________________________________________\n", - "batch_normalization_20 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_20 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "conv2d_11 (Conv2D) (None, 512, 4, 4) 262144 \n", - "_________________________________________________________________\n", - "batch_normalization_21 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_21 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_11 (ZeroPaddi (None, 512, 6, 6) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_11 (Depthwi (None, 512, 4, 4) 4608 \n", - "_________________________________________________________________\n", - "batch_normalization_22 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_22 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "conv2d_12 (Conv2D) (None, 512, 4, 4) 262144 \n", - "_________________________________________________________________\n", - "batch_normalization_23 (Batc (None, 512, 4, 4) 2048 \n", - "_________________________________________________________________\n", - "activation_23 (Activation) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "dropout_4 (Dropout) (None, 512, 4, 4) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_12 (ZeroPaddi (None, 512, 6, 6) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_12 (Depthwi (None, 512, 2, 2) 4608 \n", - "_________________________________________________________________\n", - "batch_normalization_24 (Batc (None, 512, 2, 2) 2048 \n", - "_________________________________________________________________\n", - "activation_24 (Activation) (None, 512, 2, 2) 0 \n", - "_________________________________________________________________\n", - "conv2d_13 (Conv2D) (None, 1024, 2, 2) 524288 \n", - "_________________________________________________________________\n", - "batch_normalization_25 (Batc (None, 1024, 2, 2) 4096 \n", - "_________________________________________________________________\n", - "activation_25 (Activation) (None, 1024, 2, 2) 0 \n", - "_________________________________________________________________\n", - "zero_padding2d_13 (ZeroPaddi (None, 1024, 4, 4) 0 \n", - "_________________________________________________________________\n", - "depthwise_conv2d_13 (Depthwi (None, 1024, 2, 2) 9216 \n", - "_________________________________________________________________\n", - "batch_normalization_26 (Batc (None, 1024, 2, 2) 4096 \n", - "_________________________________________________________________\n", - "activation_26 (Activation) (None, 1024, 2, 2) 0 \n", - "_________________________________________________________________\n", - "conv2d_14 (Conv2D) (None, 1024, 2, 2) 1048576 \n", - "_________________________________________________________________\n", - "batch_normalization_27 (Batc (None, 1024, 2, 2) 4096 \n", - "_________________________________________________________________\n", - "activation_27 (Activation) (None, 1024, 2, 2) 0 \n", - "_________________________________________________________________\n", - "dropout_5 (Dropout) (None, 1024, 2, 2) 0 \n", - "_________________________________________________________________\n", - "average_pooling2d_1 (Average (None, 1024, 1, 1) 0 \n", - "_________________________________________________________________\n", - "flatten_1 (Flatten) (None, 1024) 0 \n", - "_________________________________________________________________\n", - "dense_1 (Dense) (None, 10) 10250 \n", - "=================================================================\n", - "Total params: 3,239,114\n", - "Trainable params: 3,217,226\n", - "Non-trainable params: 21,888\n", - "_________________________________________________________________\n" - ] - } - ], - "source": [ - "# data augmentation, horizontal flips only\n", - "datagen = ImageDataGenerator(\n", - " featurewise_center=False,\n", - " featurewise_std_normalization=False,\n", - " rotation_range=0.0,\n", - " width_shift_range=0.0,\n", - " height_shift_range=0.0,\n", - " vertical_flip=False,\n", - " horizontal_flip=True)\n", - "datagen.fit(X_train)\n", - "\n", - "model = get_mobilenet()\n", - "\n", - "learning_rates=[]\n", - "for i in range(5):\n", - " learning_rates.append(2e-2)\n", - "for i in range(50-5):\n", - " learning_rates.append(1e-2)\n", - "for i in range(100-50):\n", - " learning_rates.append(8e-3)\n", - "for i in range(150-100):\n", - " learning_rates.append(4e-3)\n", - "for i in range(200-150):\n", - " learning_rates.append(2e-3)\n", - "for i in range(300-200):\n", - " learning_rates.append(1e-3)\n", - "\n", - "callbacks = [\n", - " LearningRateScheduler(lambda epoch: float(learning_rates[epoch]))\n", - "]\n", - "\n", - "model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), \n", - " loss='categorical_crossentropy', \n", - " metrics=['accuracy'])\n", - "\n", - "model.fit_generator(\n", - " datagen.flow(X_train, y_train, batch_size=128),\n", - " steps_per_epoch=int(np.ceil(50000 / 128)),\n", - " validation_data=(X_test, y_test),\n", - " #epochs=300,\n", - " epochs=50,\n", - " callbacks=callbacks\n", - ")\n", - "\n", - "model.summary()\n" - ] - }, - { - "cell_type": "code", - "execution_count": 46, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 0.012634992599487305\n" - ] - } - ], - "source": [ - "import time\n", - "start_time = time.time()\n", - "keras_result = model.predict(X_test[:20])\n", - "print(\"time: \", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": 18, - "metadata": {}, - "outputs": [], - "source": [ - "import keras2onnx\n", - "onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", - "import onnx\n", - "onnx.save(onnx_model, \"../models/keras/mobilenet.onnx\")\n", - "import pickle\n", - "with open('dumps/mobilenet_keras_dump', 'wb') as fp:\n", - " pickle.dump(keras_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/mobilenet_onnx.ipynb b/hpvm/projects/onnx/src/mobilenet_onnx.ipynb deleted file mode 100644 index 8b26d865421ca5c819b32abeb295b0169254a055..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/mobilenet_onnx.ipynb +++ /dev/null @@ -1,210 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import sys\n", - "import keras\n", - "from keras.datasets import cifar10\n", - "from keras import backend as K\n", - "from keras.utils import to_categorical\n", - "import numpy as np\n", - "import os\n", - "import keras2onnx\n", - "import onnx\n", - "import onnxruntime" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "X_train.shape = (50000, 3, 32, 32)\n", - "X_test.shape = (10000, 3, 32, 32)\n" - ] - } - ], - "source": [ - "K.set_image_data_format('channels_first')\n", - "\n", - "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", - "test_labels = y_test\n", - "\n", - "print (\"X_train.shape = \", X_train.shape)\n", - "print (\"X_test.shape = \", X_test.shape)\n", - "\n", - "\n", - "X_train = X_train.astype('float32')\n", - "X_test = X_test.astype('float32')\n", - "\n", - "\n", - "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n", - "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n", - "\n", - "X_train = (X_train - mean) / (std + 1e-9)\n", - "X_test = (X_test - mean) / (std + 1e-9)\n", - "\n", - "y_train = to_categorical(y_train, num_classes=10)\n", - "y_test = to_categorical(y_test, num_classes=10)" - ] - }, - { - "cell_type": "code", - "execution_count": 4, - "metadata": {}, - "outputs": [], - "source": [ - "sess = onnxruntime.InferenceSession(\"../models/keras/mobilenet.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input name : conv2d_1_input\n", - "Input shape : [None, 3, 32, 32]\n", - "Input type : tensor(float)\n" - ] - } - ], - "source": [ - "input_name = sess.get_inputs()[0].name\n", - "print(\"Input name :\", input_name)\n", - "input_shape = sess.get_inputs()[0].shape\n", - "print(\"Input shape :\", input_shape)\n", - "input_type = sess.get_inputs()[0].type\n", - "print(\"Input type :\", input_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Output name : dense_1\n", - "Output shape : [None, 10]\n", - "Output type : tensor(float)\n" - ] - } - ], - "source": [ - "output_name = sess.get_outputs()[0].name\n", - "print(\"Output name :\", output_name) \n", - "output_shape = sess.get_outputs()[0].shape\n", - "print(\"Output shape :\", output_shape)\n", - "output_type = sess.get_outputs()[0].type\n", - "print(\"Output type :\", output_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 70, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 0.006893157958984375\n" - ] - } - ], - "source": [ - "import time\n", - "start_time = time.time()\n", - "ort_result = sess.run([output_name], {input_name: X_test[:40]})\n", - "print(\"time: \", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "with open('dumps/mobilenet_ort_dump', 'wb') as fp:\n", - " pickle.dump(ort_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 11, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/mobilenet_ort_dump', 'rb') as fp:\n", - " ort_res = pickle.load(fp)\n", - "with open ('dumps/mobilenet_keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(ort_res[0], keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/mobilenet_tvm.ipynb b/hpvm/projects/onnx/src/mobilenet_tvm.ipynb deleted file mode 100644 index c0b71588afdea134ed4a2aa675d2c6058772b0b0..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/mobilenet_tvm.ipynb +++ /dev/null @@ -1,381 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import cifar10\n", - "from keras import backend as K\n", - "import sys\n", - "import struct\n", - "import numpy as np\n", - "import os\n", - "import tvm\n", - "import tvm.relay as relay\n", - "from tvm.contrib import graph_runtime\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "K.set_image_data_format('channels_first')\n", - "\n", - "(X_train, y_train), (X_test, y_test) = cifar10.load_data()\n", - "X_test = X_test.astype('float32')\n", - "mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True)\n", - "std = np.std(X_train, axis=(0, 2, 3), keepdims=True)\n", - "\n", - "X_test = (X_test - mean) / (std + 1e-9)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#print(X_test.shape)\n", - "#X_test = X_test[:8000]\n", - "print(X_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "#import keras\n", - "#model = keras.applications.mobilenet.MobileNet(input_shape=X_test[0].shape, include_top=False, weights=None)\n", - "#import keras2onnx\n", - "#onnx_model = keras2onnx.convert_keras(model, model.name, target_opset=10)\n", - "import onnx\n", - "onnx_model = onnx.load(\"../models/keras/mobilenet.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "target='cuda -libs=cudnn,cublas'\n", - "input_name = 'conv2d_1_input'\n", - "shape_dict = {input_name: X_test[:10].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "ctx = tvm.gpu()\n", - "with relay.build_config(opt_level=3):\n", - " executor = relay.build_module.create_executor('graph', mod, ctx, target)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# LLVM EXECUTE SUCCEEDED\n", - "import time\n", - "start_time = time.time()\n", - "tvm_out = executor.evaluate()(tvm.nd.array(X_test[:1000]), **params)\n", - "print(\"Time:\", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n", - "import pickle\n", - "with open('dumps/tvm_dump', 'wb') as fp:\n", - " pickle.dump(tvm_output, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 73, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute momentum is ignored in relay.sym.batch_norm\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.avg_pool2d\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (40, 1024, 'float32'), (10, 1024, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 1024, 2, 2, 'float32'), (1024, 1024, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 1024, 2, 2, 'float32'), (1024, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 512, 2, 2, 'float32'), (1024, 512, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 512, 4, 4, 'float32'), (512, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 512, 4, 4, 'float32'), (512, 512, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 512, 4, 4, 'float32'), (512, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 256, 4, 4, 'float32'), (512, 256, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 256, 8, 8, 'float32'), (256, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 256, 8, 8, 'float32'), (256, 256, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 256, 8, 8, 'float32'), (256, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 128, 8, 8, 'float32'), (256, 128, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 128, 16, 16, 'float32'), (128, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 128, 16, 16, 'float32'), (128, 128, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 128, 16, 16, 'float32'), (128, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 64, 16, 16, 'float32'), (128, 64, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 64, 32, 32, 'float32'), (64, 1, 3, 3, 'float32'), (2, 2), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 32, 32, 32, 'float32'), (64, 32, 1, 1, 'float32'), (1, 1), (0, 0), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('depthwise_conv2d_nchw', (40, 32, 32, 32, 'float32'), (32, 1, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (40, 3, 32, 32, 'float32'), (32, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" - ] - } - ], - "source": [ - "input_name = 'conv2d_1_input'\n", - "input_size = 40\n", - "shape_dict = {input_name: X_test[:input_size].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "target = 'cuda -libs=cudnn,cublas'\n", - "with relay.build_config(opt_level=3):\n", - " graph, lib, params = relay.build(mod, target, params=params)" - ] - }, - { - "cell_type": "code", - "execution_count": 74, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "ctx = tvm.gpu()\n", - "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", - "# create module\n", - "module = graph_runtime.create(graph, lib, ctx)\n", - "# set input and parameters\n", - "module.set_input(\"conv2d_1_input\", X_test[:input_size])\n", - "module.set_input(**params)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 86, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time: 0.012343168258666992\n" - ] - } - ], - "source": [ - "# run\n", - "start_time = time.time()\n", - "module.run()\n", - "out_shape = (input_size, 10)\n", - "# get output\n", - "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", - "print(\"Time:\", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": 25, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "mobilenet 17.814301 ms (0.00 ms)\n" - ] - } - ], - "source": [ - "# evaluate\n", - "ftimer = module.module.time_evaluator(\"run\", ctx, number=1)\n", - "prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond\n", - "print(\"%-20s %-19s (%s)\" % (\"mobilenet\", \"%.6f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/tvm_dump', 'rb') as fp:\n", - " tvm_res = pickle.load(fp)\n", - "with open ('dumps/keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(tvm_res, keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test\n", - "print(\"Accuracy matched!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Use TVM's implementation of mobilenet\n", - "import sys\n", - "import struct\n", - "import numpy as np\n", - "import os\n", - "import tvm\n", - "import tvm.relay as relay\n", - "from tvm.relay import testing\n", - "from tvm.contrib import graph_runtime\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": { - "scrolled": true - }, - "outputs": [], - "source": [ - "batch_size = 100\n", - "dtype = 'float32'\n", - "target='cuda -libs=cudnn,cublas'\n", - "net, params = testing.mobilenet.get_workload(batch_size=batch_size, dtype=dtype)\n", - "input_shape = (batch_size, 3, 224, 224)\n", - "output_shape = (batch_size, 1000)\n", - "with relay.build_config(opt_level=3):\n", - " graph, lib, params = relay.build(net, target=target, params=params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "ctx = tvm.context(str(target), 0)\n", - "module = graph_runtime.create(graph, lib, ctx)\n", - "data_tvm = tvm.nd.array((np.random.uniform(size=input_shape)).astype(dtype))\n", - "module.set_input('data', data_tvm)\n", - "module.set_input(**params)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# evaluate\n", - "ftimer = module.module.time_evaluator(\"run\", ctx, number=1)\n", - "prof_res = np.array(ftimer().results) * 1000 # multiply 1000 for converting to millisecond\n", - "print(\"%-20s %-19s (%s)\" % (\"mobilenet\", \"%.2f ms\" % np.mean(prof_res), \"%.2f ms\" % np.std(prof_res)))" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/vgg16_keras.ipynb b/hpvm/projects/onnx/src/vgg16_keras.ipynb deleted file mode 100644 index e1d1422ead3c3bc5afa602a13b3087ecbebc8eab..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/vgg16_keras.ipynb +++ /dev/null @@ -1,392 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import keras\n", - "from keras.datasets import cifar10\n", - "from keras.preprocessing.image import ImageDataGenerator\n", - "from keras.models import Sequential\n", - "from keras.layers import Dense, Dropout, Activation, Flatten\n", - "from keras.layers import Conv2D, MaxPooling2D, BatchNormalization\n", - "from keras import optimizers\n", - "import numpy as np\n", - "from keras.layers.core import Lambda\n", - "from keras import backend as K\n", - "from keras import regularizers\n", - "import os\n", - "import sys" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "class cifar10vgg:\n", - " def __init__(self,train=True):\n", - " self.num_classes = 10\n", - " self.weight_decay = 0.0005\n", - " self.x_shape = [3,32,32]\n", - "\n", - " self.model = self.build_model()\n", - " if train:\n", - " self.model = self.train(self.model)\n", - " else:\n", - " self.model.load_weights('cifar10vgg.h5')\n", - "\n", - "\n", - " def build_model(self):\n", - " # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper.\n", - "\n", - " model = Sequential()\n", - " weight_decay = self.weight_decay\n", - "\n", - " model.add(Conv2D(64, (3, 3), padding='same',\n", - " input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " #model.add(BatchNormalization())\n", - " model.add(Dropout(0.3))\n", - "\n", - " model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(MaxPooling2D(pool_size=(2, 2)))\n", - "\n", - " model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " #model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " #model.add(BatchNormalization())\n", - " model.add(MaxPooling2D(pool_size=(2, 2)))\n", - "\n", - " model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " #model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(MaxPooling2D(pool_size=(2, 2)))\n", - "\n", - " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(MaxPooling2D(pool_size=(2, 2)))\n", - "\n", - " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(Dropout(0.4))\n", - "\n", - " model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - " model.add(MaxPooling2D(pool_size=(2, 2)))\n", - " model.add(Dropout(0.5))\n", - "\n", - " model.add(Flatten())\n", - " model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay)))\n", - " model.add(Activation('relu'))\n", - " # model.add(BatchNormalization())\n", - "\n", - " model.add(Dropout(0.5))\n", - " model.add(Dense(self.num_classes))\n", - " model.add(Activation('softmax'))\n", - " return model\n", - "\n", - "\n", - " def normalize(self,X_train,X_test):\n", - " #this function normalize inputs for zero mean and unit variance\n", - " # it is used when training a model.\n", - " # Input: training set and test set\n", - " # Output: normalized training set and test set according to the trianing set statistics.\n", - " mean = np.mean(X_train,axis=(0,1,2,3))\n", - " std = np.std(X_train, axis=(0, 1, 2, 3))\n", - " X_train = (X_train-mean)/(std+1e-7)\n", - " X_test = (X_test-mean)/(std+1e-7)\n", - " return X_train, X_test\n", - "\n", - " \n", - " def normalize_production(self,x):\n", - " #this function is used to normalize instances in production according to saved training set statistics\n", - " # Input: X - a training set\n", - " # Output X - a normalized training set according to normalization constants.\n", - "\n", - " #these values produced during first training and are general for the standard cifar10 training set normalization\n", - " mean = 120.707\n", - " std = 64.15\n", - " return (x-mean)/(std+1e-7)\n", - "\n", - " \n", - " def predict(self,x,normalize=True,batch_size=50):\n", - " if normalize:\n", - " x = self.normalize_production(x)\n", - " return self.model.predict(x,batch_size)\n", - "\n", - " \n", - " def train(self,model):\n", - "\n", - " #training parameters\n", - " batch_size = 128\n", - " #maxepoches = 250\n", - " #maxepoches = 250\n", - " maxepoches = 30\n", - " learning_rate = 0.01\n", - " lr_decay = 1e-6\n", - " lr_drop = 20\n", - " # The data, shuffled and split between train and test sets:\n", - " (x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", - " x_train = x_train.astype('float32')\n", - " x_test = x_test.astype('float32')\n", - " x_train, x_test = self.normalize(x_train, x_test)\n", - "\n", - " y_train = keras.utils.to_categorical(y_train, self.num_classes)\n", - " y_test = keras.utils.to_categorical(y_test, self.num_classes)\n", - "\n", - " def lr_scheduler(epoch):\n", - " return learning_rate * (0.5 ** (epoch // lr_drop))\n", - " reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler)\n", - "\n", - " #data augmentation\n", - " datagen = ImageDataGenerator(\n", - " featurewise_center=False, # set input mean to 0 over the dataset\n", - " samplewise_center=False, # set each sample mean to 0\n", - " featurewise_std_normalization=False, # divide inputs by std of the dataset\n", - " samplewise_std_normalization=False, # divide each input by its std\n", - " zca_whitening=False, # apply ZCA whitening\n", - " rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180)\n", - " width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)\n", - " height_shift_range=0.1, # randomly shift images vertically (fraction of total height)\n", - " horizontal_flip=True, # randomly flip images\n", - " vertical_flip=False) # randomly flip images\n", - " # (std, mean, and principal components if ZCA whitening is applied).\n", - " datagen.fit(x_train)\n", - "\n", - "\n", - "\n", - " #optimization details\n", - " sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True)\n", - " model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy'])\n", - "\n", - "\n", - " # training process in a for loop with learning rate drop every 25 epoches.\n", - "\n", - " historytemp = model.fit_generator(datagen.flow(x_train, y_train,\n", - " batch_size=batch_size),\n", - " steps_per_epoch=x_train.shape[0] // batch_size,\n", - " epochs=maxepoches,\n", - " validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2)\n", - " \n", - " model.save_weights('cifar10vgg.h5')\n", - " return model\n", - "\n" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "WARNING:tensorflow:Variable *= will be deprecated. Use `var.assign(var * other)` if you want assignment to the variable value or `x = x * y` if you want a new python Tensor object.\n", - "Epoch 1/30\n", - " - 29s - loss: 4.3002 - acc: 0.1547 - val_loss: 4.2265 - val_acc: 0.1443\n", - "Epoch 2/30\n", - " - 23s - loss: 3.9048 - acc: 0.2016 - val_loss: 3.9923 - val_acc: 0.1640\n", - "Epoch 3/30\n", - " - 23s - loss: 3.7052 - acc: 0.2210 - val_loss: 3.8900 - val_acc: 0.1712\n", - "Epoch 4/30\n", - " - 22s - loss: 3.4890 - acc: 0.2839 - val_loss: 3.4656 - val_acc: 0.2731\n", - "Epoch 5/30\n", - " - 25s - loss: 3.2615 - acc: 0.3258 - val_loss: 3.1615 - val_acc: 0.3296\n", - "Epoch 6/30\n", - " - 23s - loss: 3.0814 - acc: 0.3557 - val_loss: 3.0238 - val_acc: 0.3395\n", - "Epoch 7/30\n", - " - 23s - loss: 2.9114 - acc: 0.3883 - val_loss: 2.8283 - val_acc: 0.4041\n", - "Epoch 8/30\n", - " - 23s - loss: 2.7543 - acc: 0.4239 - val_loss: 2.5891 - val_acc: 0.4622\n", - "Epoch 9/30\n", - " - 22s - loss: 2.6056 - acc: 0.4594 - val_loss: 2.5562 - val_acc: 0.4796\n", - "Epoch 10/30\n", - " - 25s - loss: 2.4590 - acc: 0.5013 - val_loss: 2.2543 - val_acc: 0.5714\n", - "Epoch 11/30\n", - " - 24s - loss: 2.3017 - acc: 0.5439 - val_loss: 2.1377 - val_acc: 0.5954\n", - "Epoch 12/30\n", - " - 22s - loss: 2.1519 - acc: 0.5811 - val_loss: 1.9902 - val_acc: 0.6356\n", - "Epoch 13/30\n", - " - 23s - loss: 2.0276 - acc: 0.6106 - val_loss: 1.8868 - val_acc: 0.6494\n", - "Epoch 14/30\n", - " - 22s - loss: 1.9065 - acc: 0.6357 - val_loss: 1.7370 - val_acc: 0.6813\n", - "Epoch 15/30\n", - " - 22s - loss: 1.8081 - acc: 0.6532 - val_loss: 1.7568 - val_acc: 0.6721\n", - "Epoch 16/30\n", - " - 22s - loss: 1.7116 - acc: 0.6736 - val_loss: 1.6687 - val_acc: 0.6959\n", - "Epoch 17/30\n", - " - 22s - loss: 1.6337 - acc: 0.6883 - val_loss: 1.4748 - val_acc: 0.7350\n", - "Epoch 18/30\n", - " - 22s - loss: 1.5609 - acc: 0.7022 - val_loss: 1.4428 - val_acc: 0.7445\n", - "Epoch 19/30\n", - " - 23s - loss: 1.4890 - acc: 0.7160 - val_loss: 1.3682 - val_acc: 0.7582\n", - "Epoch 20/30\n", - " - 22s - loss: 1.4291 - acc: 0.7298 - val_loss: 1.3004 - val_acc: 0.7717\n", - "Epoch 21/30\n", - " - 22s - loss: 1.3169 - acc: 0.7593 - val_loss: 1.2640 - val_acc: 0.7800\n", - "Epoch 22/30\n", - " - 24s - loss: 1.2843 - acc: 0.7669 - val_loss: 1.2191 - val_acc: 0.7864\n", - "Epoch 23/30\n", - " - 23s - loss: 1.2431 - acc: 0.7757 - val_loss: 1.1598 - val_acc: 0.7998\n", - "Epoch 24/30\n", - " - 22s - loss: 1.2166 - acc: 0.7813 - val_loss: 1.1435 - val_acc: 0.8097\n", - "Epoch 25/30\n", - " - 22s - loss: 1.1872 - acc: 0.7869 - val_loss: 1.1180 - val_acc: 0.8123\n", - "Epoch 26/30\n", - " - 23s - loss: 1.1634 - acc: 0.7916 - val_loss: 1.0977 - val_acc: 0.8123\n", - "Epoch 27/30\n", - " - 22s - loss: 1.1300 - acc: 0.7965 - val_loss: 1.0785 - val_acc: 0.8199\n", - "Epoch 28/30\n", - " - 22s - loss: 1.1169 - acc: 0.7983 - val_loss: 1.0927 - val_acc: 0.8120\n", - "Epoch 29/30\n", - " - 22s - loss: 1.0944 - acc: 0.8025 - val_loss: 1.0141 - val_acc: 0.8275\n", - "Epoch 30/30\n", - " - 22s - loss: 1.0686 - acc: 0.8069 - val_loss: 1.0341 - val_acc: 0.8206\n" - ] - } - ], - "source": [ - "K.set_image_data_format('channels_first')\n", - "\n", - "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"1\"\n", - "\n", - "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", - "test_labels = y_test\n", - "train_labels = y_train\n", - "x_train = x_train.astype('float32')\n", - "x_test = x_test.astype('float32')\n", - "\n", - "y_train = keras.utils.to_categorical(y_train, 10)\n", - "y_test = keras.utils.to_categorical(y_test, 10)\n", - "\n", - "model = cifar10vgg()\n", - "\n", - "#predicted_x = model.predict(x_test)\n", - "\n", - "#norm_test = model.normalize_production(x_test)\n", - "\n", - "# Normalizing train data before dumping\n", - "#x_train, x_test = model.normalize(x_train, x_test)\n", - "#x_train = model.normalize_production(x_train)\n" - ] - }, - { - "cell_type": "code", - "execution_count": 56, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 1.3477363586425781\n" - ] - } - ], - "source": [ - "import time\n", - "start = time.time()\n", - "result = model.predict(x_test[:8000])\n", - "print(\"time:\", time.time() - start)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import keras2onnx\n", - "onnx_model = keras2onnx.convert_keras(model.model, model.model.name, target_opset=10)\n", - "import onnx\n", - "onnx.save(onnx_model, \"../models/keras/vgg16_cifar10.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "with open('dumps/vgg16_keras_dump', 'wb') as fp:\n", - " pickle.dump(result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/vgg16_onnx.ipynb b/hpvm/projects/onnx/src/vgg16_onnx.ipynb deleted file mode 100644 index 9d0453cfb754a488d06a00afa78383ff9269e326..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/vgg16_onnx.ipynb +++ /dev/null @@ -1,158 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import cifar10\n", - "from keras import backend as K\n", - "import keras\n", - "import numpy as np\n", - "import os\n", - "import keras2onnx\n", - "import onnx\n", - "import onnxruntime\n", - "import time\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "K.set_image_data_format('channels_first')\n", - "\n", - "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", - "test_labels = y_test\n", - "train_labels = y_train\n", - "x_train = x_train.astype('float32')\n", - "x_test = x_test.astype('float32')\n", - "\n", - "y_train = keras.utils.to_categorical(y_train, 10)\n", - "y_test = keras.utils.to_categorical(y_test, 10)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Input name : conv2d_1_input\n", - "Input shape : [None, 3, 32, 32]\n", - "Input type : tensor(float)\n", - "Output name : activation_15\n", - "Output shape : [None, 10]\n", - "Output type : tensor(float)\n" - ] - } - ], - "source": [ - "sess = onnxruntime.InferenceSession(\"../models/keras/vgg16_cifar10.onnx\")\n", - "input_name = sess.get_inputs()[0].name\n", - "print(\"Input name :\", input_name)\n", - "input_shape = sess.get_inputs()[0].shape\n", - "print(\"Input shape :\", input_shape)\n", - "input_type = sess.get_inputs()[0].type\n", - "print(\"Input type :\", input_type)\n", - "output_name = sess.get_outputs()[0].name\n", - "print(\"Output name :\", output_name) \n", - "output_shape = sess.get_outputs()[0].shape\n", - "print(\"Output shape :\", output_shape)\n", - "output_type = sess.get_outputs()[0].type\n", - "print(\"Output type :\", output_type)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "time: 1.4347426891326904\n" - ] - } - ], - "source": [ - "start_time = time.time()\n", - "ort_result = sess.run([output_name], {input_name: x_test[:8000]})\n", - "print(\"time: \", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "with open('dumps/vgg16_ort_dump', 'wb') as fp:\n", - " pickle.dump(ort_result, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/vgg16_ort_dump', 'rb') as fp:\n", - " ort_res = pickle.load(fp)\n", - "with open ('dumps/vgg16_keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(ort_res[0], keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 1) #using decimal of 3 would pass test" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/src/vgg16_tvm.ipynb b/hpvm/projects/onnx/src/vgg16_tvm.ipynb deleted file mode 100644 index 8cea48d44615e3d407d25b943df6dbb86923a2d0..0000000000000000000000000000000000000000 --- a/hpvm/projects/onnx/src/vgg16_tvm.ipynb +++ /dev/null @@ -1,240 +0,0 @@ -{ - "cells": [ - { - "cell_type": "code", - "execution_count": 1, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using TensorFlow backend.\n" - ] - } - ], - "source": [ - "import numpy as np\n", - "from keras.datasets import cifar10\n", - "from keras import backend as K\n", - "import sys\n", - "import struct\n", - "import numpy as np\n", - "import os\n", - "import tvm\n", - "import tvm.relay as relay\n", - "from tvm.contrib import graph_runtime\n", - "os.environ['CUDA_VISIBLE_DEVICES'] = '1'" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [], - "source": [ - "K.set_image_data_format('channels_first')\n", - "\n", - "(x_train, y_train), (x_test, y_test) = cifar10.load_data()\n", - "x_test = x_test.astype('float32')" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "#print(X_test.shape)\n", - "#X_test = X_test[:8000]\n", - "#print(X_test.shape)" - ] - }, - { - "cell_type": "code", - "execution_count": 3, - "metadata": {}, - "outputs": [], - "source": [ - "import onnx\n", - "onnx_model = onnx.load(\"../models/keras/vgg16_cifar10.onnx\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "target='cuda -libs=cudnn,cublas'\n", - "input_name = 'conv2d_8_input'\n", - "shape_dict = {input_name: X_test[:1000].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "ctx = tvm.gpu()\n", - "with relay.build_config(opt_level=3):\n", - " executor = relay.build_module.create_executor('graph', mod, ctx, target)\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# LLVM EXECUTE SUCCEEDED\n", - "import time\n", - "start_time = time.time()\n", - "tvm_out = executor.evaluate()(tvm.nd.array(X_test.astype('float32')[:1000]), **params)\n", - "print(\"Time:\", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top1_tvm = np.argmax(tvm_out.asnumpy()[0])\n", - "import pickle\n", - "with open('dumps/tvm_dump', 'wb') as fp:\n", - " pickle.dump(tvm_output, fp)" - ] - }, - { - "cell_type": "code", - "execution_count": 59, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.conv2d\n", - "WARNING:root:Attribute auto_pad is ignored in relay.sym.max_pool2d\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (6000, 512, 'float32'), (10, 512, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('dense', (6000, 512, 'float32'), (512, 512, 'float32'), 0, 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 512, 2, 2, 'float32'), (512, 512, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 512, 4, 4, 'float32'), (512, 512, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 256, 4, 4, 'float32'), (512, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 256, 8, 8, 'float32'), (256, 256, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 128, 8, 8, 'float32'), (256, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 128, 16, 16, 'float32'), (128, 128, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 64, 16, 16, 'float32'), (128, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 64, 32, 32, 'float32'), (64, 64, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n", - "WARNING:autotvm:Cannot find config for target=cuda -libs=cudnn,cublas, workload=('conv2d', (6000, 3, 32, 32, 'float32'), (64, 3, 3, 3, 'float32'), (1, 1), (1, 1), (1, 1), 'NCHW', 'float32'). A fallback configuration is used, which may bring great performance regression.\n" - ] - } - ], - "source": [ - "input_name = 'conv2d_1_input'\n", - "input_size = 6000\n", - "shape_dict = {input_name: x_test[:input_size].shape}\n", - "mod, params = relay.frontend.from_onnx(onnx_model, shape_dict)\n", - "target = 'cuda -libs=cudnn,cublas'\n", - "with relay.build_config(opt_level=3):\n", - " graph, lib, params = relay.build(mod, target, params=params)" - ] - }, - { - "cell_type": "code", - "execution_count": 60, - "metadata": {}, - "outputs": [], - "source": [ - "import time\n", - "ctx = tvm.gpu()\n", - "#data = np.random.uniform(-1, 1, size=data_shape).astype(\"float32\")\n", - "# create module\n", - "module = graph_runtime.create(graph, lib, ctx)\n", - "# set input and parameters\n", - "module.set_input(\"conv2d_1_input\", x_test[:input_size])\n", - "module.set_input(**params)\n", - "# run\n" - ] - }, - { - "cell_type": "code", - "execution_count": 64, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Time: 0.2862420082092285\n" - ] - } - ], - "source": [ - "start_time = time.time()\n", - "module.run()\n", - "out_shape = (input_size, 10)\n", - "# get output\n", - "out = module.get_output(0, tvm.nd.empty(out_shape)).asnumpy()\n", - "print(\"Time:\", time.time() - start_time)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "import pickle\n", - "import numpy as np\n", - "with open ('dumps/tvm_dump', 'rb') as fp:\n", - " tvm_res = pickle.load(fp)\n", - "with open ('dumps/keras_dump', 'rb') as fp:\n", - " keras_res = pickle.load(fp)\n", - "\n", - "for ref_o, o in zip(tvm_res, keras_res):\n", - " np.testing.assert_almost_equal(ref_o, o, 3) #using decimal of 3 would pass test\n", - "print(\"Accuracy matched!\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Python 3", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.6.6" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/hpvm/projects/onnx/.gitignore b/hpvm/projects/onnx_frontend/.gitignore similarity index 100% rename from hpvm/projects/onnx/.gitignore rename to hpvm/projects/onnx_frontend/.gitignore diff --git a/hpvm/projects/onnx/frontend/README.md b/hpvm/projects/onnx_frontend/README.md similarity index 56% rename from hpvm/projects/onnx/frontend/README.md rename to hpvm/projects/onnx_frontend/README.md index 69ffe81f5ad6b1a57dda1a6410d4348ff616148a..719abc5e3c2346fd7bc68972ee4f9035af307db1 100644 --- a/hpvm/projects/onnx/frontend/README.md +++ b/hpvm/projects/onnx_frontend/README.md @@ -1,3 +1,17 @@ +## Importing Conda Environment: + +conda env create -f onnx\_environment.yml + +## Activate/deactivate Conda Environment + +conda activate onnx\_frontend + +## Building and Installing Frontend for ONNX: + +python setup.py build + +python setup.py install + ### How to Run ``` python main.py @@ -7,4 +21,5 @@ Set all your config, e.g. onnx model location, input size and emit directory for ### Resources 1. [ONNX overview](https://github.com/onnx/onnx/blob/master/docs/IR.md) 2. [ONNX operator specs](https://github.com/onnx/onnx/blob/master/docs/Operators.md) -3. [Conversion between models - available adapters](https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21) \ No newline at end of file +3. [Conversion between models - available adapters](https://github.com/onnx/onnx/blob/master/onnx/version_converter.py#L21) +4. \ No newline at end of file diff --git a/hpvm/projects/onnx/TODO.md b/hpvm/projects/onnx_frontend/TODO.md similarity index 100% rename from hpvm/projects/onnx/TODO.md rename to hpvm/projects/onnx_frontend/TODO.md diff --git a/hpvm/projects/onnx/env.yaml b/hpvm/projects/onnx_frontend/env.yaml similarity index 100% rename from hpvm/projects/onnx/env.yaml rename to hpvm/projects/onnx_frontend/env.yaml diff --git a/hpvm/projects/onnx/frontend/__init__.py b/hpvm/projects/onnx_frontend/frontend/__init__.py similarity index 100% rename from hpvm/projects/onnx/frontend/__init__.py rename to hpvm/projects/onnx_frontend/frontend/__init__.py diff --git a/hpvm/projects/onnx/frontend/codegen_hpvm.py b/hpvm/projects/onnx_frontend/frontend/codegen_hpvm.py similarity index 100% rename from hpvm/projects/onnx/frontend/codegen_hpvm.py rename to hpvm/projects/onnx_frontend/frontend/codegen_hpvm.py diff --git a/hpvm/projects/onnx/frontend/codegen_tensor.py b/hpvm/projects/onnx_frontend/frontend/codegen_tensor.py similarity index 100% rename from hpvm/projects/onnx/frontend/codegen_tensor.py rename to hpvm/projects/onnx_frontend/frontend/codegen_tensor.py diff --git a/hpvm/projects/onnx/frontend/graph_builder.py b/hpvm/projects/onnx_frontend/frontend/graph_builder.py similarity index 100% rename from hpvm/projects/onnx/frontend/graph_builder.py rename to hpvm/projects/onnx_frontend/frontend/graph_builder.py diff --git a/hpvm/projects/onnx/frontend/graph_ir.py b/hpvm/projects/onnx_frontend/frontend/graph_ir.py similarity index 100% rename from hpvm/projects/onnx/frontend/graph_ir.py rename to hpvm/projects/onnx_frontend/frontend/graph_ir.py diff --git a/hpvm/projects/onnx/frontend/main.py b/hpvm/projects/onnx_frontend/frontend/main.py similarity index 100% rename from hpvm/projects/onnx/frontend/main.py rename to hpvm/projects/onnx_frontend/frontend/main.py diff --git a/hpvm/projects/onnx/frontend/onnx_attr.py b/hpvm/projects/onnx_frontend/frontend/onnx_attr.py similarity index 100% rename from hpvm/projects/onnx/frontend/onnx_attr.py rename to hpvm/projects/onnx_frontend/frontend/onnx_attr.py diff --git a/hpvm/projects/onnx/frontend/template_hpvm.cpp b/hpvm/projects/onnx_frontend/frontend/template_hpvm.cpp similarity index 100% rename from hpvm/projects/onnx/frontend/template_hpvm.cpp rename to hpvm/projects/onnx_frontend/frontend/template_hpvm.cpp diff --git a/hpvm/projects/onnx/frontend/template_tensor.cpp b/hpvm/projects/onnx_frontend/frontend/template_tensor.cpp similarity index 100% rename from hpvm/projects/onnx/frontend/template_tensor.cpp rename to hpvm/projects/onnx_frontend/frontend/template_tensor.cpp diff --git a/hpvm/projects/onnx/setup.py b/hpvm/projects/onnx_frontend/setup.py similarity index 100% rename from hpvm/projects/onnx/setup.py rename to hpvm/projects/onnx_frontend/setup.py