diff --git a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp index 24ba749cb57953cfec2985ef47c37282bf6f0f93..e1634be3a84b27de82eacc437473c9024cf878c7 100644 --- a/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp +++ b/hpvm/projects/hpvm-tensor-rt/tensor_runtime/src/hpvm-rt-controller.cpp @@ -1422,13 +1422,14 @@ hpvm_rt_readLabelsBatch_cached(const char *labels_file, int start, int end) { ERROR("Data file %s is not found. Aborting...\n", labels_file); abort(); } + // Get number of labels fseek(file, 0, SEEK_END); long size = ftell(file); fseek(file, 0, SEEK_SET); // return file pointer to beginning // Allocate memory for labels - labels_from_file = (uint32_t *)malloc(size); + labels_from_file = (uint32_t *) malloc(size); if (labels_from_file == NULL) { ERROR("Memory allocation for labels unsucessfull. Aborting...\n"); abort(); @@ -1506,9 +1507,9 @@ float hpvm_rt_computeAccuracy3(uint32_t *labels, void *result_ptr) { } -//#define llvm_hpvm_invokeRtControl_BASE llvm_hpvm_invokeRtControl +#define llvm_hpvm_invokeRtControl_BASE llvm_hpvm_invokeRtControl //#define llvm_hpvm_invokeRtControl_ADJUST_PR llvm_hpvm_invokeRtControl -#define llvm_hpvm_invokeRtControl_ITERATE llvm_hpvm_invokeRtControl +//#define llvm_hpvm_invokeRtControl_ITERATE llvm_hpvm_invokeRtControl extern "C" void llvm_hpvm_invokeRtControl_BASE( void *result, const char *str, int start, int end) { diff --git a/hpvm/projects/keras/README.md b/hpvm/projects/keras/README.md index 1f790fd46da13a489e12974328471e017e24743b..70828896b00ddb2c452b74a2275370b71ec7b5c4 100644 --- a/hpvm/projects/keras/README.md +++ b/hpvm/projects/keras/README.md @@ -1,12 +1,101 @@ +# Keras Frontend -## Importing Conda Environment: +## Installing Dependencies -conda env create -f keras_environment.yml +### Updating pip +The pip version required in this subproject must be >= `19.3`. -## Building and Installing Frontend: +To upgrade pip: +``` +pip install --upgrade pip +``` + +To check installed pip version: + +``` +pip -V +``` + +### Importing Conda Environment: + +``` +conda env create -f keras_environment.yml --name ${KERAS_ENV_NAME} +``` +Note: pip version MUST be > 19.3 + +### Activating Conda Environment: + +``` +conda activate ${KERAS_ENV_NAME} +``` + +### Building and Installing Frontend: + +``` python setup.py build python setup.py install +``` + +## Running Benchmaks + +Benchmarks under `./src/` + +List of benchmarks and the expected accuracies: + +| Benchmark | Accuracy | +| ----------- | ----------- | +| AlexNet-CIFAR10 | 79.16 | +| AlexNet2-CIFAR10 | 85.10 | +| AlexNet-ImageNet | 56.30 | +| LeNet-MNIST | 99.11 | todo: fix broken +| MobileNet-CIFAR10 | 82.40 | +| ResNet18-CIFAR10 | 89.52 | +| ResNet50-ImageNet | 75.10 | +| VGG16-CIFAR10 | 89.42 | +| VGG16-CIFAR100 | 66.20 | +| VGG16-ImageNet | 69.46 | + +Activate conda environment (above) before running benchmarks + +### Synopsis + +``` +python src/${BENCH_NAME}.py [hpvm_reload|keras_reload] [frontend|keras_dump] + +``` + +**Parameters:** + +`hpvm_reload` : Reloads HPVM weights (format used in `model_params` found here: [ADD link to Google Drive]) from directory specified in Benchmark constructor. + +`keras_reload`: Reloads weights in Keras `.h5` file format + +`frontend`: Invokes the HPVM frontend and dumps weights in directory specified in constructor + +`keras_dump`: Dumps keras .h5 format model weights in directory specified in constructor + + + +### Building New Benchmarks + +All benchmarks inherit from the commom parent `Benchmark` class. +Each benchmark overrides virtual functions for building the model, training, inference, +and data preprocessing. + + +`def buildModel(self)`: +returns a keras model + +`def data_preprocess(self)`: +returns X_train, y_train, X_test, y_test, X_tuner, and y_tuner data — in that order; this data will be directly used later for training and inference + +`def trainModel(self, model, X_train, y_train, X_test, y_test)`: +returns a trained keras model + + + + diff --git a/hpvm/projects/keras/cmake_template/CMakeLists.txt b/hpvm/projects/keras/cmake_template/CMakeLists.txt deleted file mode 100644 index 0cdee697ce2d663775f0283e96f35c45fd467986..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/cmake_template/CMakeLists.txt +++ /dev/null @@ -1,59 +0,0 @@ -cmake_minimum_required (VERSION 3.17) -project (hpvm-tensor-rt) -find_package(CUDA 6.5 REQUIRED) -set(CUDA_SEPARABLE_COMPILATION ON CACHE BOOL "") -set(CUDA_PROPAGATE_HOST_FLAGS OFF) - -# Addresses a bug where code is not compiled as C++11 in non-CUDA code and older g++ versions -# Edit: using c++14 now -set(CMAKE_CXX_STANDARD 14) -set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -std=c++14 -I/") -set( - CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS}; - -gencode;arch=compute_60,code=sm_60; - -gencode;arch=compute_60,code=compute_60; - -std=c++14 --expt-relaxed-constexpr -maxrregcount 32 # These are for image ops -) -if(CMAKE_BUILD_TYPE STREQUAL "Debug") - message("Debug mode") - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-g;-lineinfo;-Xcompiler;-ggdb) -else() - set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};-DNDEBUG;-Xcompiler;-DNDEBUG) -endif() - - -# Default include/link directories -include_directories(${CUDA_TOOLKIT_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}/include) -include_directories($ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/include) -include_directories($ENV{LLVM_SRC_ROOT}/projects/hpvm-tensor-rt/tensor_runtime/include) -include_directories($ENV{LLVM_SRC_ROOT}/projects/hpvm-tensor-rt/dnn_sources/include) -include_directories($ENV{LLVM_SRC_ROOT}/projects/gpu_profiler/include) -include_directories($ENV{LLVM_SRC_ROOT}/projects/soc_simulator/include) -link_directories(${CUDA_TOOLKIT_ROOT_DIR}/lib64 $ENV{CUDNN_PATH} $ENV{CUDNN_PATH}/lib $ENV{CUDNN_PATH}/lib64) - - - -# Default link libraries -find_library(GPU_PROFILER_LIB - NAMES libgpu_profiler.a - HINTS $ENV{LLVM_SRC_ROOT}/projects/gpu_profiler/lib - ) - -find_library(SOC_SIMULATOR_LIB - NAMES libpromise_profiler.a - HINTS $ENV{LLVM_SRC_ROOT}/projects/soc_simulator/lib - ) - -find_library(TENSOR_LIB - NAMES libtensor_runtime.a - HINTS $ENV{LLVM_SRC_ROOT}/projects/hpvm-tensor-rt/lib - ) - - -set(LINK_LIBS cudart cudnn cufft cublas stdc++fs curand -pthread) - -#### Image Processing Benchmarks - -add_executable(DNN_binary src.cc) -target_link_libraries(DNN_binary ${TENSOR_LIB} ${GPU_PROFILER_LIB} ${SOC_SIMULATOR_LIB} ${LINK_LIBS}) - diff --git a/hpvm/projects/keras/frontend/approxhpvm_translator.py b/hpvm/projects/keras/frontend/approxhpvm_translator.py index e60d6adb994ee4da56765a1fa365b14a792863d2..59bc0d17c0605672153a542e850d98cf9d868d77 100644 --- a/hpvm/projects/keras/frontend/approxhpvm_translator.py +++ b/hpvm/projects/keras/frontend/approxhpvm_translator.py @@ -5,6 +5,7 @@ from frontend.promise_translator import PromiseRtTranslator from frontend.hpvm_dfg_translator import HPVMTranslator from frontend.weight_utils import dumpLabels, dumpData, dumpConvWeights, dumpFcWeights, dumpFcBias from frontend.utils import * +from frontend.knobs import * import keras import os @@ -205,7 +206,75 @@ class TensorRtTranslator: self.input_str = "" self.filter_names = {} + # Used for Json gen + self.json_str = "" + self.knobs_str = "" + self.cur_height = 32 + self.cur_width = 32 + self.op_count = 1 + + + + + def setInputHeightWidth(self, data): + + self.cur_height = data.shape[2] + self.cur_width = data.shape[3] + DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, ", \n") + + + def addConvOverheads(self, weights, padding, strides): + + K_d = weights.shape[0] * weights.shape[1] * weights.shape[2] * weights.shape[3] + + H_d = self.cur_height / strides[0] + W_d = self.cur_width / strides[1] + + flops = H_d * W_d * K_d + DEBUG ("conv_flops = ", flops) + + self.json_str += "\"convolution_" + str(self.op_count) + "\" : " + str(flops) + ", \n" + self.knobs_str += "\"convolution_" + str(self.op_count) + "\" : [" + conv_knobs + "], \n" + self.op_count += 1 + + self.cur_height = self.cur_height / strides[0] + self.cur_width = self.cur_width / strides[1] + + DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, "\n") + + + def addDenseOverheads(self, weights): + + flops = weights.shape[0] * weights.shape[1] + DEBUG ("dense_flops = ", flops) + + self.json_str += "\"linear_" + str(self.op_count) + "\" : " + str(flops) + ", \n" + self.knobs_str += "\"linear_" + str(self.op_count) + "\" : [" + baseline_knobs + "], \n" + self.op_count += 1 + + self.cur_height = 1 + self.cur_width = weights.shape[1] + + DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, "\n") + + + def adjustPoolDims(self, strides): + + self.cur_height = self.cur_height / strides[0] + self.cur_width = self.cur_width / strides[1] + + DEBUG ("cur_height = ", self.cur_height, " cur_width = ", self.cur_width, "\n") + + + def addBaselineKnob(self, op_name): + self.json_str += "\"" + op_name + "_" + str(self.op_count) + "\" : 0, \n" + self.knobs_str += "\"" + op_name + "_" + str(self.op_count) + "\" : [" + baseline_knobs + "], \n" + self.op_count += 1 + + + + def getWeightStr(self): return self.weight_str @@ -393,7 +462,16 @@ class TensorRtTranslator: print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n"); sys.exit(0) - + # NOTE: For Json (tuning config) file generation + if layer_type == "Conv2D": + self.addConvOverheads(weights, padding, strides) + + elif layer_type == "DepthwiseConv2D": + #self.json_str += "depthwise_convolution_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob("depthwise_convolution") + + if layer_type == "Dense": input_var_name = self.getSingleInputName(cur_node) @@ -406,6 +484,9 @@ class TensorRtTranslator: self.program_str += inst_str + # Add Cost for Dense Layer (Json file) + self.addDenseOverheads(weights) + if self.hasBiasAdd(cur_node): out_var_name2 = self.getVariableName(cur_node) @@ -420,13 +501,21 @@ class TensorRtTranslator: # NOTE: Changing output variable out_var_name1 = out_var_name2 + #self.json_str += "add_" + str(self.op_count) + " : 0, \n" + # self.op_count += 1 + self.addBaselineKnob("add") + if layer_type == "Activation": input_var_name = self.getSingleInputName(cur_node) inst_str = genActivationCallStr(input_var_name, out_var_name1, cur_node.activation_type) self.program_str += inst_str - + + #self.json_str += cur_node.activation_type + "_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob(cur_node.activation_type) + if self.hasActivation(cur_node) and layer_type != "Activation": activation_type = cur_node.activation_type @@ -438,7 +527,11 @@ class TensorRtTranslator: if activation_type == "softmax": print ("Softmax canNOT be part of Dense/Conv Op. Insert: Activation('softmax');") sys.exit(0) - + + #self.json_str += activation_type + "_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob(activation_type) + if layer_type == "BatchNormalization": input_var_name = self.getSingleInputName(cur_node) @@ -453,6 +546,11 @@ class TensorRtTranslator: inst_str += "); \n" self.program_str += inst_str + + #self.json_str += "batchnorm_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob("batchnorm") + if layer_type == "Add": @@ -462,6 +560,10 @@ class TensorRtTranslator: inst_str += "tensorAdd(" + input_vars[0] + ", " + input_vars[1] + "); \n" self.program_str += inst_str + #self.json_str += "add_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob("add") + if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": input_var_name = self.getSingleInputName(cur_node) @@ -473,8 +575,16 @@ class TensorRtTranslator: pool_type = 0 if layer_type == "MaxPooling2D": pool_type = "0" + #self.json_str += "maxpool_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob("maxpool") + if layer_type == "AveragePooling2D": - pool_type = "1" + pool_type = "1" + #self.json_str += "avgpool_" + str(self.op_count) + " : 0, \n" + #self.op_count += 1 + self.addBaselineKnob("avgpool") + # tensorPooling(input, pool_type, pool_h, pool_w, v_pad, h_pad, v_stride, h_stride) inst_str = "void* " + out_var_name1 + " = " @@ -483,6 +593,7 @@ class TensorRtTranslator: inst_str += "); \n" self.program_str += inst_str + self.adjustPoolDims(strides) @@ -518,7 +629,7 @@ class TensorRtTranslator: - def dump_weights(self, model, prefix): + def dump_weights(self, model, prefix, reload_weights): layer_count = 0 for i in range(len(model.layers)): @@ -539,7 +650,7 @@ class TensorRtTranslator: W = weights.shape[0] unique_file_name = w_name + ".bin" - dumpConvWeights(prefix + unique_file_name, weights, N, C, H, W) + dumpConvWeights(prefix + unique_file_name, weights, N, C, H, W, reload_weights) file_path = w_name + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -566,7 +677,7 @@ class TensorRtTranslator: DEBUG (bias_weights.shape, b_name) unique_file_name = b_name + ".bin" - dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0]) + dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0], reload_weights) file_path = b_name + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -590,7 +701,7 @@ class TensorRtTranslator: W = weights.shape[1] unique_file_name = w_name + ".bin" - dumpFcWeights(prefix + unique_file_name, weights, H, W) + dumpFcWeights(prefix + unique_file_name, weights, H, W, reload_weights) file_path = w_name + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -609,7 +720,7 @@ class TensorRtTranslator: DEBUG (bias_weights.shape, b_name) unique_file_name = b_name + ".bin" - dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0]) + dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0], reload_weights) file_path = b_name + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -629,7 +740,7 @@ class TensorRtTranslator: gamma_id = layer_name + "_gamma" gamma_file_name = gamma_id + ".bin" self.filter_names[gamma_id] = 1 - dumpFcBias(prefix + gamma_file_name, gamma_w, gamma_w.shape[0]) + dumpFcBias(prefix + gamma_file_name, gamma_w, gamma_w.shape[0], reload_weights) file_path = gamma_id + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -644,7 +755,7 @@ class TensorRtTranslator: beta_id = layer_name + "_beta" beta_file_name = beta_id + ".bin" self.filter_names[beta_id] = 1 - dumpFcBias(prefix + beta_file_name, beta_w, beta_w.shape[0]) + dumpFcBias(prefix + beta_file_name, beta_w, beta_w.shape[0], reload_weights) file_path = beta_id + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -658,7 +769,7 @@ class TensorRtTranslator: mean_id = layer_name + "_mean" mean_file_name = mean_id + ".bin" self.filter_names[mean_id] = 1 - dumpFcBias(prefix + mean_file_name, mean_w, mean_w.shape[0]) + dumpFcBias(prefix + mean_file_name, mean_w, mean_w.shape[0], reload_weights) file_path = mean_id + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -673,7 +784,7 @@ class TensorRtTranslator: variance_id = layer_name + "_variance" variance_file_name = variance_id + ".bin" self.filter_names[variance_id] = 1 - dumpFcBias(prefix + variance_file_name, variance_w, variance_w.shape[0]) + dumpFcBias(prefix + variance_file_name, variance_w, variance_w.shape[0], reload_weights) file_path = variance_id + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" @@ -728,39 +839,55 @@ class TensorRtTranslator: - def genInputCalls(self, test_data, test_labels, weights_dir): - - dumpData(weights_dir + "input.bin", test_data) + def genInputReadCall(self, input_data, input_name): - N = test_data.shape[0] - C = test_data.shape[1] - H = test_data.shape[2] - W = test_data.shape[3] - - file_path = "input_path" + file_path = input_name + "_path" file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" - file_path_str += "input.bin\"); \n" + file_path_str += input_name + ".bin\"); \n" self.weight_str += file_path_str - - self.input_str += "void* input = readTrainedWeights(" + + N = input_data.shape[0] + C = input_data.shape[1] + H = input_data.shape[2] + W = input_data.shape[3] + + self.input_str += "void* " + input_name + " = readTrainedWeights(" self.input_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," self.input_str += str(H) + "," + str(W) + "); \n" + + + def genLabelReadCall(self, labels, labels_name): + + file_path = labels_name + "_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += labels_name + ".bin\"); \n" + self.weight_str += file_path_str + + self.input_str += "uint32_t* " + labels_name + " = readLabels3(" + self.input_str += file_path + ".c_str()," + str(labels.shape[0]) + "); \n" + + + + def genInputCalls(self, test_data, test_labels, tuner_data, tuner_labels, weights_dir, reload_weights): + + dumpData(weights_dir + "test_input.bin", test_data, reload_weights) + self.genInputReadCall(test_data, "test_input") # Adding input to the filter map self.filter_names["input"] = 1 - - dumpLabels(weights_dir + "labels.bin", test_labels) + dumpLabels(weights_dir + "test_labels.bin", test_labels, reload_weights) + self.genLabelReadCall(test_labels, "test_labels") + + dumpData(weights_dir + "tune_input.bin", tuner_data, reload_weights) + self.genInputReadCall(test_data, "tune_input") + + dumpLabels(weights_dir + "tune_labels.bin", tuner_labels, reload_weights) + self.genLabelReadCall(test_labels, "tune_labels") - file_path = "labels_path" - file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" - file_path_str += "labels.bin\"); \n" - self.weight_str += file_path_str - #self.input_str += "uint8_t* labels = readLabels(" - self.input_str += "uint32_t* labels = readLabels2(" - self.input_str += file_path + ".c_str()," + str(test_labels.shape[0]) + "); \n" + def genBatchLoop(self, x_test): @@ -797,12 +924,9 @@ class TensorRtTranslator: last_node = self.dfg.last_node output_var = self.output_map[last_node.layer_name] - #accuracy_call = "\nfloat accuracy = computeAccuracy2(labels, batch_size, " + output_var + "); \n" - #accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, batch_size, " + output_var + "); \n" accuracy_call = "\nfloat accuracy = computeAccuracy3(labels, " + output_var + "); \n" end_loop_str += accuracy_call - #end_loop_str += "float accuracy = computeAccuracy2(labels, batch_size, var_60); " end_loop_str += "final_accuracy += accuracy; \n" end_loop_str += "freeBatchMemory(); \n " end_loop_str += "\n}\n\n" @@ -821,20 +945,46 @@ class TensorRtTranslator: f.write(self.program_str) f.close() + + def dumpJsonFile(self, dir_prefix): + + f = open(dir_prefix + "/tuner.json", "w+") + f.write("{ \n\n") + + op_cost_str = " \"op_cost\" : { \n" + op_cost_str += self.json_str[:-3] + #f.write(self.json_str) + op_cost_str += "\n }, \n\n" + f.write(op_cost_str) + + knobs_speedup_str = "\n \"knob_speedup\" : { \n" + for key in knobs_speedups: + knobs_speedup_str += "\"" + str(key) + "\" : " + str(knobs_speedups[key]) + ", \n" + + f.write(knobs_speedup_str[:-3] + "\n}, \n\n") + + + layer_knobs_str = " \"op_knobs\" : { \n" + layer_knobs_str += self.knobs_str[:-3] + layer_knobs_str += " \n\n } \n\n" + f.write(layer_knobs_str) + + f.write("\n\n}") + f.close() + - def translate(self, model, weights_dir, test_data, test_labels): + def translate(self, model, weights_dir, src_dir, test_data, test_labels, tuner_data, tuner_labels, weights_reload): self.add_header() - #dir_path = "std::string dir_prefix = std::string(\"" + weights_dir + "\"); \n" - dir_path = "std::string dir_prefix = std::string(\"../\"); \n" + dir_path = "std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + std::string(\"" + weights_dir + "\"); \n" self.weight_str += dir_path if test_data is not None: - self.genInputCalls(test_data, test_labels, weights_dir) + self.genInputCalls(test_data, test_labels, tuner_data, tuner_labels, weights_dir, weights_reload) - self.dump_weights(model, weights_dir) + self.dump_weights(model, weights_dir, weights_reload) self.program_str += "\n" + self.weight_str + "\n\n" self.genBatchLoop(test_data) @@ -845,9 +995,10 @@ class TensorRtTranslator: self.add_footer(test_data); - self.generateSourceProgram(weights_dir) + self.generateSourceProgram(src_dir) + + self.dumpJsonFile(src_dir) - @@ -921,21 +1072,27 @@ def getUniquePath(weights_dir): return weights_dir - + + + #***** Top level External Function ******* -def translate_to_approxhpvm(model, weights_dir, test_data=None, test_labels=None, - num_classes=10, reload_dir=None): +def translate_to_approxhpvm(model, + weights_dir, src_dir, + test_data, test_labels, + tuner_data, tuner_labels, + batch_size, num_classes=10, + enable_weights_reload = False): - weights_dir = getUniquePath(weights_dir) - os.mkdir(weights_dir) + reload_weights = enable_weights_reload # If set to True, does not dump any weight/input/label files - - if reload_dir is not None: - y_test = keras.utils.to_categorical(test_labels, num_classes) - reloadModelParams(model, reload_dir, test_data, y_test) + if not reload_weights: + weights_dir = getUniquePath(weights_dir) + os.mkdir(weights_dir) - + src_dir = getUniquePath(src_dir) + os.mkdir(src_dir) + dfg = DFG() for i in range(len(model.layers)): layer = model.layers[i] @@ -949,22 +1106,25 @@ def translate_to_approxhpvm(model, weights_dir, test_data=None, test_labels=None DEBUG ("test_data.shape = ", test_data.shape, "\n") DEBUG ("test_labels.shape = ", test_labels.shape, "\n") - tensorRtTranslator = TensorRtTranslator(dfg) - tensorRtTranslator.translate(model, weights_dir, test_data, test_labels) + tensorRtTranslator = TensorRtTranslator(dfg) + tensorRtTranslator.setInputHeightWidth(test_data) + tensorRtTranslator.translate(model, weights_dir, src_dir, test_data, test_labels, tuner_data, tuner_labels, reload_weights) weight_str = tensorRtTranslator.getWeightStr() input_str = tensorRtTranslator.getInputStr() - #promiseRtTranslator = PromiseRtTranslator(dfg, weight_str) - #promiseRtTranslator.translate(model, weights_dir, test_data) - filter_names = tensorRtTranslator.getFilterNames() hpvmTranslator = HPVMTranslator(dfg, weight_str, input_str, filter_names) - hpvmTranslator.translate(model, weights_dir, test_data) + hpvmTranslator.translate(model, src_dir, test_data, tuner_data, batch_size) + if reload_weights: + print ("NOTE: Using existing pretrained weights \n") + else: + print ("NOTE: dumping new set of weights \n") + print ("-- Weight Files Under : ", weights_dir) - print ("-- TensorRT src : ", weights_dir + "/src.cc") - print ("-- ApproxHPVM src : ", weights_dir + "approxhpvm_src.cc") + print ("-- TensorRT src : ", src_dir + "/src.cc") + print ("-- ApproxHPVM src : ", src_dir + "approxhpvm_src.cc") return weights_dir diff --git a/hpvm/projects/keras/frontend/config.py b/hpvm/projects/keras/frontend/config.py new file mode 100644 index 0000000000000000000000000000000000000000..72f18e342be19c9e56ecdc7373e8c037d0131f87 --- /dev/null +++ b/hpvm/projects/keras/frontend/config.py @@ -0,0 +1,3 @@ + +# Path Relative to Model Params Directoryx +MODEL_PARAMS_DIR = "../hpvm-tensor-rt/model_params/" diff --git a/hpvm/projects/keras/frontend/hpvm_dfg_translator.py b/hpvm/projects/keras/frontend/hpvm_dfg_translator.py index 65574a98881f010bf7cd67df344517803de8c67c..2c229a0dd2b100cf83882f5640c1d2707c78398d 100644 --- a/hpvm/projects/keras/frontend/hpvm_dfg_translator.py +++ b/hpvm/projects/keras/frontend/hpvm_dfg_translator.py @@ -1,7 +1,8 @@ import sys from frontend.utils import * - +from frontend.hpvm_intrinsics import * + class HPVMTranslator: @@ -19,7 +20,6 @@ class HPVMTranslator: self.file_header_str = "" self.hpvm_node_names = {} - @@ -140,15 +140,15 @@ class HPVMTranslator: header_str = self.genNodeHeader(output_var, 1) inst_str = header_str - func_name = "__visc__tensor_" + func_name = "" if activation_type == "tanh": - func_name += "tanh" + func_name += HPVM_tensor_tanh if activation_type == "relu": - func_name += "relu" + func_name += HPVM_tensor_relu if activation_type == "softmax": - func_name += "softmax" + func_name += HPVM_tensor_softmax inst_str += " void* r = " + func_name + "(t1); \n" footer_str = self.genNodeFooter(2) @@ -166,20 +166,26 @@ class HPVMTranslator: node_header_str += ", " node_header_str += ") { \n" - node_header_str += " __visc__hint(visc::PROMISE_TARGET); \n" - node_header_str += " __visc__attributes(" + str(num_params) + ", " + node_header_str += " " + HPVM_hint + "(" + HPVM_layer_hint + "); \n" + node_header_str += " " + HPVM_attributes + "(" + str(num_params) + ", " + for i in range(num_params): node_header_str += "t" + str(i + 1) if i < num_params - 1: node_header_str += ", " - node_header_str += ", 0); \n\n" + node_header_str += ", 0); \n" + + # Adding node.id calls to assign IDs that are used with the runtime (for correct config ordering) + node_header_str += " " + HPVM_node_id + "(" + str(self.counter) + "); \n\n" + return node_header_str def genNodeFooter(self, num_params): - node_footer_str = " __visc__return(" + + node_footer_str = " " + HPVM_return + "(" node_footer_str += str(num_params) + ", " node_footer_str += "r, " node_footer_str += "(size_t) 0); \n" @@ -188,61 +194,60 @@ class HPVMTranslator: return node_footer_str - # NOTE: genHpvmNodeEdges is replaced by genHpvmEdges def genHpvmNodeEdges2(self, hpvm_node_id, input_vars): hpvm_edge_str = "\n void* " + hpvm_node_id + " = " - hpvm_edge_str += "__visc__createNodeND(0, " + hpvm_node_id + "_node); \n\n" - + hpvm_edge_str += HPVM_createNodeND + "(0, " + hpvm_node_id + "_node); \n\n" + it = 0 for input_var_name in input_vars: if input_var_name in self.filter_names: input_index = self.filter_names[input_var_name] index1 = input_index * 2 index2 = index1 + 1 - hpvm_edge_str += " __visc__bindIn(" + hpvm_node_id + ", " + str(index1) + ", " + str(it*2) + ", 0); \n" - hpvm_edge_str += " __visc__bindIn(" + hpvm_node_id + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n" + hpvm_edge_str += " " + HPVM_bindIn + "(" + hpvm_node_id + ", " + str(index1) + ", " + str(it*2) + ", 0); \n" + hpvm_edge_str += " " + HPVM_bindIn + "(" + hpvm_node_id + ", " + str(index2) + ", " + str(it*2+1) + ", 0); \n" elif input_var_name in self.hpvm_node_names: - hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + hpvm_node_id + ", 1, 0, " + str(it*2) + ", 0); \n" - hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + hpvm_node_id + ", 1, 1, " + str(it*2+1) + ", 0); \n" - + hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name + ", " + hpvm_node_id + ", 1, 0, " + str(it*2) + ", 0); \n" + hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name + ", " + hpvm_node_id + ", 1, 1, " + str(it*2+1) + ", 0); \n" + it += 1 return hpvm_edge_str + - # Fix: replace deprecated genHpvmNodeEdges with genHpvmEdges def genHpvmNodeEdges(self, out_var_name, input_var_name, input_var_name2): DEBUG ("input_var_name2 = ", input_var_name2) DEBUG ("input_var_name = ", input_var_name) hpvm_edge_str = "\n void* " + out_var_name + " = " - hpvm_edge_str += "__visc__createNodeND(0, " + out_var_name + "_node); \n\n" + hpvm_edge_str += HPVM_createNodeND + "(0, " + out_var_name + "_node); \n\n" if input_var_name in self.filter_names: input_index = self.filter_names[input_var_name] index1 = input_index * 2 index2 = index1 + 1 - hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index1) + ", 0, 0); \n" - hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index2) + ", 1, 0); \n" + hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index1) + ", 0, 0); \n" + hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index2) + ", 1, 0); \n" elif input_var_name in self.hpvm_node_names: - hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 0, 0, 0); \n" - hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 1, 1, 0); \n" + hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name + ", " + out_var_name + ", 1, 0, 0, 0); \n" + hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name + ", " + out_var_name + ", 1, 1, 1, 0); \n" if input_var_name2 in self.filter_names: input_index = self.filter_names[input_var_name2] index1 = input_index * 2 index2 = index1 + 1 - hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index1) + ", 2, 0); \n" - hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index2) + ", 3, 0); \n" + hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index1) + ", 2, 0); \n" + hpvm_edge_str += " " + HPVM_bindIn + "(" + out_var_name + ", " + str(index2) + ", 3, 0); \n" elif input_var_name2 in self.hpvm_node_names: - hpvm_edge_str += " __visc__edge(" + input_var_name2 + ", " + out_var_name + ", 1, 0, 2, 0); \n" - hpvm_edge_str += " __visc__edge(" + input_var_name2 + ", " + out_var_name + ", 1, 1, 3, 0); \n" + hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name2 + ", " + out_var_name + ", 1, 0, 2, 0); \n" + hpvm_edge_str += " " + HPVM_edge + "(" + input_var_name2 + ", " + out_var_name + ", 1, 1, 3, 0); \n" return hpvm_edge_str @@ -254,10 +259,9 @@ class HPVMTranslator: header_str = self.genNodeHeader(out_var_name, 2) inst_str = header_str - inst_str += " void *r = __visc__tensor_mul(t1, t2); \n" + inst_str += " void *r = " + HPVM_tensor_mul + "(t1, t2); \n" footer_str = self.genNodeFooter(2) inst_str += footer_str - input_var_name = self.getSingleInputName(cur_node) weight_name = cur_node.layer_name + "_w" @@ -270,7 +274,7 @@ class HPVMTranslator: def genConvNode(self, cur_node): - #input_var_name = self.getSingleInputName(cur_node) + out_var_name = self.getVariableName(cur_node) header_str = self.genNodeHeader(out_var_name, 2) @@ -291,7 +295,7 @@ class HPVMTranslator: # FIXME: currently only supporting symmetric padding padding = prev_padding[0][0] - inst_str += " void *r = __visc__tensor_convolution(t1, t2, " + inst_str += " void *r = " + HPVM_tensor_convolution + "(t1, t2, " inst_str += str(padding) + ", " inst_str += str(padding) + ", " inst_str += str(strides[0]) + ", " @@ -310,7 +314,7 @@ class HPVMTranslator: def genDepthwiseConvNode(self, cur_node): - #input_var_name = self.getSingleInputName(cur_node) + out_var_name = self.getVariableName(cur_node) header_str = self.genNodeHeader(out_var_name, 2) @@ -331,7 +335,7 @@ class HPVMTranslator: # FIXME: currently only supporting symmetric padding padding = prev_padding[0][0] - inst_str += " void *r = __visc__tensor_group_convolution(t1, t2, " + inst_str += " void *r = " + HPVM_tensor_group_convolution + "(t1, t2, " inst_str += str(padding) + ", " inst_str += str(padding) + ", " inst_str += str(strides[0]) + ", " @@ -355,13 +359,13 @@ class HPVMTranslator: def genBatchNormNode(self, cur_node): - #input_var_name = self.getSingleInputName(cur_node) + out_var_name = self.getVariableName(cur_node) header_str = self.genNodeHeader(out_var_name, 5) inst_str = header_str - inst_str += " void *r = __visc__tensor_batchnorm(t1, t2, t3, t4, t5, " + inst_str += " void *r = " + HPVM_tensor_batchnorm + "(t1, t2, t3, t4, t5, " inst_str += str(cur_node.epsilon) + "); \n" footer_str = self.genNodeFooter(2) @@ -394,7 +398,7 @@ class HPVMTranslator: header_str = self.genNodeHeader(out_var_name, 2) inst_str = header_str - inst_str += " void *r = __visc__tensor_add(t1, t2); \n" + inst_str += " void *r = " + HPVM_tensor_add + "(t1, t2); \n" footer_str = self.genNodeFooter(2) inst_str += footer_str @@ -434,7 +438,7 @@ class HPVMTranslator: header_str = self.genNodeHeader(out_var_name, 2) inst_str = header_str - inst_str += " void *r = __visc__tensor_add(t1, t2); \n" + inst_str += " void *r = " + HPVM_tensor_add + "(t1, t2); \n" footer_str = self.genNodeFooter(2) inst_str += footer_str @@ -460,9 +464,9 @@ class HPVMTranslator: layer_type = cur_node.layer_type if layer_type == "MaxPooling2D": - func_name = "__visc__tensor_pool_max" + func_name = HPVM_tensor_pool_max if layer_type == "AveragePooling2D": - func_name = "__visc__tensor_pool_mean" + func_name = HPVM_tensor_pool_mean inst_str += " void* r = " + func_name + "(t1, " inst_str += str(pool_size[0]) + ", " + str(pool_size[1]) + ", " @@ -549,7 +553,10 @@ class HPVMTranslator: headers += "#include <sys/stat.h> \n" headers += "#include <cstring> \n" - headers += "#include <visc.h> \n" + headers += "#include <" + HPVM_header + "> \n" + if LLVM_9_BRANCH: + headers += "#include \"config.h\" \n" + headers += "#include <tensorTypes.h> \n" headers += "#include <tensorUtils.h> \n\n" @@ -571,9 +578,10 @@ class HPVMTranslator: index += 1 root_signature += "){ \n\n" - root_signature += "\n __visc__hint(visc::CPU_TARGET); \n" - root_signature += " __visc__attributes(" + str(len(self.filter_names)) + ", " + root_signature += "\n " + HPVM_hint + "(" + HPVM_cpu_hint + "); \n" + root_signature += " " + HPVM_attributes + "(" + str(len(self.filter_names)) + ", " + index = 0 for f_name in self.filter_names: root_signature += f_name @@ -591,8 +599,8 @@ class HPVMTranslator: output_var = self.output_map[last_node.layer_name] # Binding output of last DFG node to the Root Node output - root_footer_str = "\n __visc__bindOut(" + output_var + ", 0, 0, 0); \n" - root_footer_str += " __visc__bindOut(" + output_var + ", 1, 1, 0); \n" + root_footer_str = "\n " + HPVM_bindOut + "(" + output_var + ", 0, 0, 0); \n" + root_footer_str += " " + HPVM_bindOut + "(" + output_var + ", 1, 1, 0); \n" root_footer_str += "\n}\n\n" self.root_str += root_footer_str @@ -618,29 +626,98 @@ class HPVMTranslator: - def genMainFunction(self, test_data): - main_func_str = "int main(){ \n\n" - main_func_str += self.weight_str - main_func_str += self.input_str - main_func_str += "\n__visc__init(); \n" - main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n" + def genBatchLoop(self, test_data, batch_size): - for f_name in self.filter_names: - main_func_str += "args->" + f_name + " = " + f_name + "; \n" - main_func_str += "args->" + f_name + "_bytes = 0; \n" + chans = test_data.shape[1] + width = test_data.shape[2] + height = test_data.shape[3] + test_input_size = test_data.shape[0] - main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n" - main_func_str += "__visc__wait(dfg); \n\n" - - main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n" - main_func_str += "hpvm_request_tensor(result, 0); \n\n" - main_func_str += "__visc__cleanup(); \n " + func_str = "unsigned int batch_size = " + str(batch_size) + "; \n" + func_str += "unsigned int test_input_size = " + str(test_input_size) + "; \n" + func_str += "unsigned int batch_count = test_input_size / batch_size; \n\n" + + func_str += "startMemTracking(); \n" + func_str += "startProfiling(); \n\n" + + func_str += "for(unsigned int j = 0; j < 1; j++){ \n" + func_str += "for(unsigned int i = 0; i < batch_count; i++){ \n\n" + + func_str += "unsigned int start = i * batch_size; \n" + func_str += "unsigned int end = (i + 1) * batch_size; \n" + + func_str += "void* input = readInputBatch(input_path.c_str(), 0, start, end," + str(chans) + "," + str(width) + "," + str(height) + "); \n\n" - main_func_str += "computeAccuracy3(labels, result); \n" - main_func_str += "return 0; \n\n" - main_func_str += "} \n" + func_str += "args->input = input; \n" + func_str += "args->input_bytes = 0; \n\n" + + return func_str + + + + def endBatchLoop(self): + + func_str = "freeBatchMemory(); \n" + func_str += "} \n" + func_str += "} \n\n" + func_str += "stopProfiling(); \n" + + return func_str + + # FIXIT + def handleTuneTestData(self): + + input_str = "void* input = test_input; \n" + input_str += "std::string input_path = test_input_path; \n" + input_str += "std::string labels_path = test_labels_path; \n\n" + + input_str += "if (argc >= 2 && std::string(argv[1]) == \"tune\"){ \n" + input_str += " input = tune_input; \n" + input_str += " input_path = tune_input_path; \n" + input_str += " labels_path = tune_labels_path; \n\n" + input_str += "} \n\n" + + return input_str + + - self.main_func_str += main_func_str + def genMainFunction(self, test_data, batch_size): + + main_func_str = "int main(int argc, char* argv[]){ \n\n" + main_func_str += self.weight_str + main_func_str += self.input_str + main_func_str += "\n" + HPVM_init + "(); \n" + main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n" + + main_func_str += self.handleTuneTestData() + + for f_name in self.filter_names: + main_func_str += "args->" + f_name + " = " + f_name + "; \n" + main_func_str += "args->" + f_name + "_bytes = 0; \n" + + main_func_str += self.genBatchLoop(test_data, batch_size) + + main_func_str += "void* dfg = " + HPVM_launch + "(0, root, (void*) args); \n\n" + main_func_str += HPVM_wait + "(dfg); \n\n" + + if LLVM_4_BRANCH: + main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n" + elif LLVM_9_BRANCH: + main_func_str += "void *result = static_cast<RootIn *>(args)->r.tensor; \n" + + main_func_str += "hpvm_request_tensor(result, 0); \n\n" + main_func_str += "llvm_hpvm_invokeRtControl(result, labels_path.c_str(), start, end); \n" + + main_func_str += self.endBatchLoop() + + main_func_str += HPVM_cleanup + "(); \n " + + ####main_func_str += "computeAccuracy3(labels, result); \n" + main_func_str += "return 0; \n\n" + main_func_str += "} \n" + + self.main_func_str += main_func_str + @@ -658,17 +735,17 @@ class HPVMTranslator: - def translate(self, model, weights_dir, test_data): + def translate(self, model, src_dir, test_data, tuner_data, batch_size): self.genFileHeader() self.genRootNodeHeader() self.genRootStructure() self.codegen(self.dfg) self.genRootNodeFooter() - self.genMainFunction(test_data) + self.genMainFunction(test_data, batch_size) # dump generated program string to source file - self.generateSourceProgram(weights_dir) + self.generateSourceProgram(src_dir) diff --git a/hpvm/projects/keras/frontend/hpvm_intrinsics.py b/hpvm/projects/keras/frontend/hpvm_intrinsics.py new file mode 100644 index 0000000000000000000000000000000000000000..1060d965234bab61e9a97559a367d7902baa02d5 --- /dev/null +++ b/hpvm/projects/keras/frontend/hpvm_intrinsics.py @@ -0,0 +1,70 @@ + +LLVM_4_BRANCH = False +LLVM_9_BRANCH = not LLVM_4_BRANCH + +if LLVM_4_BRANCH: + + HPVM_header = "visc.h" + + HPVM_hint = "__visc__hint" + HPVM_attributes = "__visc__attributes" + HPVM_node_id = "__visc__node_id" + HPVM_layer_hint = "visc::PROMISE_TARGET" + HPVM_cpu_hint = "visc::CPU_TARGET" + + HPVM_init = "__visc__init" + HPVM_cleanup = "__visc__cleanup" + HPVM_launch = "__visc__launch" + HPVM_wait = "__visc__wait" + + HPVM_tensor_convolution = "__visc__tensor_convolution" + HPVM_tensor_group_convolution = "__visc__tensor_group_convolution" + HPVM_tensor_add = "__visc__tensor_add" + HPVM_tensor_mul = "__visc__tensor_mul" + HPVM_tensor_batchnorm = "__visc__tensor_batchnorm" + HPVM_tensor_pool_max = "__visc__tensor_pool_max" + HPVM_tensor_pool_mean = "__visc__tensor_pool_mean" + HPVM_tensor_tanh = "__visc__tensor_tanh" + HPVM_tensor_relu = "__visc__tensor_relu" + HPVM_tensor_softmax = "__visc__tensor_softmax" + + HPVM_createNodeND = "__visc__createNodeND" + HPVM_bindIn = "__visc__bindIn" + HPVM_bindOut = "__visc__bindOut" + HPVM_edge = "__visc__edge" + HPVM_return = "__visc__return" + + +elif LLVM_9_BRANCH: + + HPVM_header = "hpvm.h" + + HPVM_hint = "__hpvm__hint" + HPVM_attributes = "__hpvm__attributes" + HPVM_node_id = "__hpvm__node_id" + HPVM_layer_hint = "hpvm::TENSOR_TARGET" + HPVM_cpu_hint = "hpvm::CPU_TARGET" + + HPVM_init = "__hpvm__init" + HPVM_cleanup = "__hpvm__cleanup" + HPVM_launch = "__hpvm__launch" + HPVM_wait = "__hpvm__wait" + + HPVM_tensor_convolution = "__hpvm__tensor_convolution" + HPVM_tensor_group_convolution = "__hpvm__tensor_group_convolution" + HPVM_tensor_add = "__hpvm__tensor_add" + HPVM_tensor_mul = "__hpvm__tensor_mul" + HPVM_tensor_batchnorm = "__hpvm__tensor_batchnorm" + HPVM_tensor_pool_max = "__hpvm__tensor_pool_max" + HPVM_tensor_pool_mean = "__hpvm__tensor_pool_mean" + HPVM_tensor_tanh = "__hpvm__tensor_tanh" + HPVM_tensor_relu = "__hpvm__tensor_relu" + HPVM_tensor_softmax = "__hpvm__tensor_softmax" + + HPVM_createNodeND = "__hpvm__createNodeND" + HPVM_bindIn = "__hpvm__bindIn" + HPVM_bindOut = "__hpvm__bindOut" + HPVM_edge = "__hpvm__edge" + HPVM_return = "__hpvm__return" + + diff --git a/hpvm/projects/keras/frontend/knobs.py b/hpvm/projects/keras/frontend/knobs.py new file mode 100644 index 0000000000000000000000000000000000000000..291221acb544dbcdf88c810b9401356d2da91be7 --- /dev/null +++ b/hpvm/projects/keras/frontend/knobs.py @@ -0,0 +1,38 @@ + +knobs_speedups = {} +knobs_speedups[11] = 1 +knobs_speedups[12] = 1.5 +knobs_speedups[151] = 3 +knobs_speedups[152] = 3 +knobs_speedups[153] = 3 +knobs_speedups[154] = 3 +knobs_speedups[155] = 2.25 +knobs_speedups[156] = 2.25 +knobs_speedups[157] = 2.25 +knobs_speedups[158] = 2.25 +knobs_speedups[159] = 2.25 +knobs_speedups[160] = 2.25 +knobs_speedups[161] = 2 +knobs_speedups[162] = 2 +knobs_speedups[163] = 2 +knobs_speedups[164] = 2 +knobs_speedups[165] = 2 +knobs_speedups[166] = 2 +knobs_speedups[167] = 2 +knobs_speedups[168] = 2 +knobs_speedups[261] = 3 +knobs_speedups[262] = 3 +knobs_speedups[263] = 2.25 +knobs_speedups[264] = 2.25 +knobs_speedups[265] = 2.25 +knobs_speedups[266] = 2 +knobs_speedups[267] = 2 +knobs_speedups[268] = 2 +knobs_speedups[269] = 2 + + +conv_knobs = "12, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 261, 262, 263, 264, 265, 266, 267, 268, 269" + +baseline_knobs = "12" + + diff --git a/hpvm/projects/keras/frontend/weight_utils.py b/hpvm/projects/keras/frontend/weight_utils.py index dd22765386e2172572ad0feec201c7dec407a909..83a13c81351bf621a5f7be41ebe8f67bfdf6c3d4 100644 --- a/hpvm/projects/keras/frontend/weight_utils.py +++ b/hpvm/projects/keras/frontend/weight_utils.py @@ -1,12 +1,17 @@ +import sys import numpy as np import struct import random from keras.optimizers import Adam -def dumpLabels(file_name, Y_test): +def dumpLabels(file_name, Y_test, reload_weights = False): + + if reload_weights: + return + print ("Dumping Labels File = ", file_name) f = open(file_name, "wb") @@ -27,38 +32,16 @@ def dumpLabels(file_name, Y_test): f.close() -""" -def dumpData(file_name, X_test): - - N = X_test.shape[0] - C = X_test.shape[1] - H = X_test.shape[2] - W = X_test.shape[3] - - print ("*DumpData") - #print("-min_val = ", np.amin(X_test)) - #print("-max_val = ", np.amax(X_test)) - - f = open(file_name, "wb") - for i in range(N): - for j in range(C): - for k in range(H): - for l in range(W): - val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l])) - f.write(np.float32(val[0])) - - f.close() - -""" -def dumpData(file_name, X_test): +def dumpData(file_name, X_test, reload_weights = False): + if reload_weights: + return + print ("*Dumping Input File = ", file_name) - #print("-min_val = ", np.amin(X_test)) - #print("-max_val = ", np.amax(X_test)) - + f = open(file_name, "wb") X_test = X_test.flatten() @@ -69,34 +52,14 @@ def dumpData(file_name, X_test): -""" -def dumpConvWeights(file_name, weights, N, C, H, W): - - print (weights.shape) - print ("*DumpConvWeights") - - #print("-min_val = ", np.amin(weights)) - #print("-max_val = ", np.amax(weights)) - - - f = open(file_name, "wb") - for i in range(N): - for j in range(C): - for k in range(H): - for l in range(W): - f.write(weights[k][l][j][i]) - f.close() - -""" - - -def dumpConvWeights(file_name, X_test, N, C, H, W): +def dumpConvWeights(file_name, X_test, N, C, H, W, reload_weights = False): + if reload_weights: + return + print ("*Dumping Conv Weights to file = ", file_name) - #print("-min_val = ", np.amin(X_test)) - #print("-max_val = ", np.amax(X_test)) - + f = open(file_name, "wb") X_test = np.transpose(X_test, (3, 2, 0, 1)) @@ -109,13 +72,13 @@ def dumpConvWeights(file_name, X_test, N, C, H, W): -def dumpFcWeights(file_name, weights, H, W): +def dumpFcWeights(file_name, weights, H, W, reload_weights = False): + if reload_weights: + return + print ("*Dumping FC weights to = ", file_name) - #print("-min_val = ", np.amin(weights)) - #print("-max_val = ", np.amax(weights)) - f = open(file_name, "wb") for i in range(H): for j in range(W): @@ -125,8 +88,11 @@ def dumpFcWeights(file_name, weights, H, W): -def dumpFcBias(file_name, bias, W): +def dumpFcBias(file_name, bias, W, reload_weights = False): + if reload_weights: + return + print ("*Dump Bias Weights = ", file_name) f = open(file_name, "wb") @@ -171,7 +137,7 @@ def dumpCalibrationData2(file_name, test_data, labels_fname, test_labels): # Loads Existing HPVM FP32 weights -def reloadHPVMWeights(model, reload_dir, output_model, X_test, Y_test): +def reloadHPVMWeights(model, reload_dir, output_model): print ("***** Reloading pre-trained HPVM weights ****") @@ -179,37 +145,51 @@ def reloadHPVMWeights(model, reload_dir, output_model, X_test, Y_test): layer = model.layers[i] layer_name = layer.name #-- print ("*layer_name = ", layer_name) - if "conv" not in layer_name and "dense" not in layer_name: - continue + if "conv" in layer_name or "dense" in layer_name: - w_path = reload_dir + layer_name + "_w.bin" - #-- print ("** w_path = ", w_path) - w_arr = np.fromfile(w_path, dtype='float32') - - b_path = reload_dir + layer_name + "_b.bin" - b_arr = np.fromfile(b_path, dtype='float32') + w_path = reload_dir + layer_name + "_w.bin" + #-- print ("** w_path = ", w_path) + w_arr = np.fromfile(w_path, dtype='float32') - w_shape = layer.get_weights()[0].shape - if "conv" in layer_name: - w_nchw_shape = (w_shape[3], w_shape[2], w_shape[0], w_shape[1]) - w_arr = np.reshape(w_arr, w_nchw_shape) - w_arr = np.transpose(w_arr, (2,3,1,0)) + if layer.use_bias: + b_path = reload_dir + layer_name + "_b.bin" + b_arr = np.fromfile(b_path, dtype='float32') - if "dense" in layer_name: - w_arr = np.reshape(w_arr, w_shape) + w_shape = layer.get_weights()[0].shape + if "conv" in layer_name: + w_nchw_shape = (w_shape[3], w_shape[2], w_shape[0], w_shape[1]) + w_arr = np.reshape(w_arr, w_nchw_shape) + w_arr = np.transpose(w_arr, (2,3,1,0)) - weights = [] - weights.append(w_arr) - weights.append(b_arr) - - # Overriding model weights - layer.set_weights(weights) + if "dense" in layer_name: + w_arr = np.reshape(w_arr, w_shape) + + if layer.use_bias: + weights = [w_arr, b_arr] + else: + weights = [w_arr] + + layer.set_weights(weights) + + elif "batch_normalization" in layer_name: + beta_path = reload_dir + layer_name + "_beta.bin" + gamma_path = reload_dir + layer_name + "_gamma.bin" + mean_path = reload_dir + layer_name + "_mean.bin" + variance_path = reload_dir + layer_name + "_variance.bin" + + beta = np.fromfile(beta_path, dtype='float32') + gamma = np.fromfile(gamma_path, dtype='float32') + mean = np.fromfile(mean_path, dtype='float32') + variance = np.fromfile(variance_path, dtype='float32') + + weights = [gamma, beta, mean, variance] + + layer.set_weights(weights) + # Model recompilation needed after resetting weights model.compile(loss='categorical_crossentropy', optimizer=Adam(lr=0.0001, decay=1e-6), metrics=['accuracy']) - model.save(output_model) - return model diff --git a/hpvm/projects/keras/legacy/generated/vgg16/approxhpvm_src.cc b/hpvm/projects/keras/legacy/generated/vgg16/approxhpvm_src.cc deleted file mode 100644 index 2bf9bad6b967698e34af99cd128fee8fa206bd5e..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/legacy/generated/vgg16/approxhpvm_src.cc +++ /dev/null @@ -1,982 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/stat.h> -#include <cstring> -#include <visc.h> -#include <tensorTypes.h> -#include <tensorUtils.h> - -void var_0_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_1_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_2_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_3_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_4_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_5_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_6_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_7_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_8_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_9_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_10_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_11_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_12_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_13_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_14_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_15_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_16_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_17_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_18_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_19_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_20_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_21_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_22_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_23_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_24_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_25_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_26_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_27_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_28_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_29_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_30_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_31_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_32_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_33_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_34_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_35_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_36_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_37_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_38_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_39_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_40_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_convolution(t1, t2, 1, 1, 1, 1); - __visc__return(2, r, (size_t) 0); -} - -void var_41_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_42_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_43_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_pool_max(t1, 2, 2, 0, 0, 2, 2); - __visc__return(2, r, (size_t) 0); -} - -void var_44_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_45_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_46_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_relu(t1); - __visc__return(2, r, (size_t) 0); -} - -void var_47_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_mul(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_48_node(void* t1, size_t bytes_t1, void* t2, size_t bytes_t2) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(2, t1, t2, 0); - - void *r = __visc__tensor_add(t1, t2); - __visc__return(2, r, (size_t) 0); -} - -void var_49_node(void* t1, size_t bytes_t1) { - __visc__hint(visc::CUDNN_TARGET); - __visc__attributes(1, t1, 0); - - void* r = __visc__tensor_softmax(t1); - __visc__return(2, r, (size_t) 0); -} - -void root(void* input, size_t input_bytes, - void* conv2d_1_w, size_t conv2d_1_w_bytes, - void* conv2d_1_b, size_t conv2d_1_b_bytes, - void* conv2d_2_w, size_t conv2d_2_w_bytes, - void* conv2d_2_b, size_t conv2d_2_b_bytes, - void* conv2d_3_w, size_t conv2d_3_w_bytes, - void* conv2d_3_b, size_t conv2d_3_b_bytes, - void* conv2d_4_w, size_t conv2d_4_w_bytes, - void* conv2d_4_b, size_t conv2d_4_b_bytes, - void* conv2d_5_w, size_t conv2d_5_w_bytes, - void* conv2d_5_b, size_t conv2d_5_b_bytes, - void* conv2d_6_w, size_t conv2d_6_w_bytes, - void* conv2d_6_b, size_t conv2d_6_b_bytes, - void* conv2d_7_w, size_t conv2d_7_w_bytes, - void* conv2d_7_b, size_t conv2d_7_b_bytes, - void* conv2d_8_w, size_t conv2d_8_w_bytes, - void* conv2d_8_b, size_t conv2d_8_b_bytes, - void* conv2d_9_w, size_t conv2d_9_w_bytes, - void* conv2d_9_b, size_t conv2d_9_b_bytes, - void* conv2d_10_w, size_t conv2d_10_w_bytes, - void* conv2d_10_b, size_t conv2d_10_b_bytes, - void* conv2d_11_w, size_t conv2d_11_w_bytes, - void* conv2d_11_b, size_t conv2d_11_b_bytes, - void* conv2d_12_w, size_t conv2d_12_w_bytes, - void* conv2d_12_b, size_t conv2d_12_b_bytes, - void* conv2d_13_w, size_t conv2d_13_w_bytes, - void* conv2d_13_b, size_t conv2d_13_b_bytes, - void* dense_1_w, size_t dense_1_w_bytes, - void* dense_1_b, size_t dense_1_b_bytes, - void* dense_2_w, size_t dense_2_w_bytes, - void* dense_2_b, size_t dense_2_b_bytes){ - - - __visc__hint(visc::CPU_TARGET); - __visc__attributes(31, input, conv2d_1_w, conv2d_1_b, conv2d_2_w, conv2d_2_b, conv2d_3_w, conv2d_3_b, conv2d_4_w, conv2d_4_b, conv2d_5_w, conv2d_5_b, conv2d_6_w, conv2d_6_b, conv2d_7_w, conv2d_7_b, conv2d_8_w, conv2d_8_b, conv2d_9_w, conv2d_9_b, conv2d_10_w, conv2d_10_b, conv2d_11_w, conv2d_11_b, conv2d_12_w, conv2d_12_b, conv2d_13_w, conv2d_13_b, dense_1_w, dense_1_b, dense_2_w, dense_2_b, 0); - - - void* var_0 = __visc__createNodeND(0, var_0_node); - - __visc__bindIn(var_0, 0, 0, 0); - __visc__bindIn(var_0, 1, 1, 0); - __visc__bindIn(var_0, 2, 2, 0); - __visc__bindIn(var_0, 3, 3, 0); - - void* var_1 = __visc__createNodeND(0, var_1_node); - - __visc__edge(var_0, var_1, 1, 0, 0, 0); - __visc__edge(var_0, var_1, 1, 1, 1, 0); - __visc__bindIn(var_1, 4, 2, 0); - __visc__bindIn(var_1, 5, 3, 0); - - void* var_2 = __visc__createNodeND(0, var_2_node); - - __visc__edge(var_1, var_2, 1, 0, 0, 0); - __visc__edge(var_1, var_2, 1, 1, 1, 0); - - void* var_3 = __visc__createNodeND(0, var_3_node); - - __visc__edge(var_2, var_3, 1, 0, 0, 0); - __visc__edge(var_2, var_3, 1, 1, 1, 0); - __visc__bindIn(var_3, 6, 2, 0); - __visc__bindIn(var_3, 7, 3, 0); - - void* var_4 = __visc__createNodeND(0, var_4_node); - - __visc__edge(var_3, var_4, 1, 0, 0, 0); - __visc__edge(var_3, var_4, 1, 1, 1, 0); - __visc__bindIn(var_4, 8, 2, 0); - __visc__bindIn(var_4, 9, 3, 0); - - void* var_5 = __visc__createNodeND(0, var_5_node); - - __visc__edge(var_4, var_5, 1, 0, 0, 0); - __visc__edge(var_4, var_5, 1, 1, 1, 0); - - void* var_6 = __visc__createNodeND(0, var_6_node); - - __visc__edge(var_5, var_6, 1, 0, 0, 0); - __visc__edge(var_5, var_6, 1, 1, 1, 0); - - void* var_7 = __visc__createNodeND(0, var_7_node); - - __visc__edge(var_6, var_7, 1, 0, 0, 0); - __visc__edge(var_6, var_7, 1, 1, 1, 0); - __visc__bindIn(var_7, 10, 2, 0); - __visc__bindIn(var_7, 11, 3, 0); - - void* var_8 = __visc__createNodeND(0, var_8_node); - - __visc__edge(var_7, var_8, 1, 0, 0, 0); - __visc__edge(var_7, var_8, 1, 1, 1, 0); - __visc__bindIn(var_8, 12, 2, 0); - __visc__bindIn(var_8, 13, 3, 0); - - void* var_9 = __visc__createNodeND(0, var_9_node); - - __visc__edge(var_8, var_9, 1, 0, 0, 0); - __visc__edge(var_8, var_9, 1, 1, 1, 0); - - void* var_10 = __visc__createNodeND(0, var_10_node); - - __visc__edge(var_9, var_10, 1, 0, 0, 0); - __visc__edge(var_9, var_10, 1, 1, 1, 0); - __visc__bindIn(var_10, 14, 2, 0); - __visc__bindIn(var_10, 15, 3, 0); - - void* var_11 = __visc__createNodeND(0, var_11_node); - - __visc__edge(var_10, var_11, 1, 0, 0, 0); - __visc__edge(var_10, var_11, 1, 1, 1, 0); - __visc__bindIn(var_11, 16, 2, 0); - __visc__bindIn(var_11, 17, 3, 0); - - void* var_12 = __visc__createNodeND(0, var_12_node); - - __visc__edge(var_11, var_12, 1, 0, 0, 0); - __visc__edge(var_11, var_12, 1, 1, 1, 0); - - void* var_13 = __visc__createNodeND(0, var_13_node); - - __visc__edge(var_12, var_13, 1, 0, 0, 0); - __visc__edge(var_12, var_13, 1, 1, 1, 0); - - void* var_14 = __visc__createNodeND(0, var_14_node); - - __visc__edge(var_13, var_14, 1, 0, 0, 0); - __visc__edge(var_13, var_14, 1, 1, 1, 0); - __visc__bindIn(var_14, 18, 2, 0); - __visc__bindIn(var_14, 19, 3, 0); - - void* var_15 = __visc__createNodeND(0, var_15_node); - - __visc__edge(var_14, var_15, 1, 0, 0, 0); - __visc__edge(var_14, var_15, 1, 1, 1, 0); - __visc__bindIn(var_15, 20, 2, 0); - __visc__bindIn(var_15, 21, 3, 0); - - void* var_16 = __visc__createNodeND(0, var_16_node); - - __visc__edge(var_15, var_16, 1, 0, 0, 0); - __visc__edge(var_15, var_16, 1, 1, 1, 0); - - void* var_17 = __visc__createNodeND(0, var_17_node); - - __visc__edge(var_16, var_17, 1, 0, 0, 0); - __visc__edge(var_16, var_17, 1, 1, 1, 0); - __visc__bindIn(var_17, 22, 2, 0); - __visc__bindIn(var_17, 23, 3, 0); - - void* var_18 = __visc__createNodeND(0, var_18_node); - - __visc__edge(var_17, var_18, 1, 0, 0, 0); - __visc__edge(var_17, var_18, 1, 1, 1, 0); - __visc__bindIn(var_18, 24, 2, 0); - __visc__bindIn(var_18, 25, 3, 0); - - void* var_19 = __visc__createNodeND(0, var_19_node); - - __visc__edge(var_18, var_19, 1, 0, 0, 0); - __visc__edge(var_18, var_19, 1, 1, 1, 0); - - void* var_20 = __visc__createNodeND(0, var_20_node); - - __visc__edge(var_19, var_20, 1, 0, 0, 0); - __visc__edge(var_19, var_20, 1, 1, 1, 0); - __visc__bindIn(var_20, 26, 2, 0); - __visc__bindIn(var_20, 27, 3, 0); - - void* var_21 = __visc__createNodeND(0, var_21_node); - - __visc__edge(var_20, var_21, 1, 0, 0, 0); - __visc__edge(var_20, var_21, 1, 1, 1, 0); - __visc__bindIn(var_21, 28, 2, 0); - __visc__bindIn(var_21, 29, 3, 0); - - void* var_22 = __visc__createNodeND(0, var_22_node); - - __visc__edge(var_21, var_22, 1, 0, 0, 0); - __visc__edge(var_21, var_22, 1, 1, 1, 0); - - void* var_23 = __visc__createNodeND(0, var_23_node); - - __visc__edge(var_22, var_23, 1, 0, 0, 0); - __visc__edge(var_22, var_23, 1, 1, 1, 0); - - void* var_24 = __visc__createNodeND(0, var_24_node); - - __visc__edge(var_23, var_24, 1, 0, 0, 0); - __visc__edge(var_23, var_24, 1, 1, 1, 0); - __visc__bindIn(var_24, 30, 2, 0); - __visc__bindIn(var_24, 31, 3, 0); - - void* var_25 = __visc__createNodeND(0, var_25_node); - - __visc__edge(var_24, var_25, 1, 0, 0, 0); - __visc__edge(var_24, var_25, 1, 1, 1, 0); - __visc__bindIn(var_25, 32, 2, 0); - __visc__bindIn(var_25, 33, 3, 0); - - void* var_26 = __visc__createNodeND(0, var_26_node); - - __visc__edge(var_25, var_26, 1, 0, 0, 0); - __visc__edge(var_25, var_26, 1, 1, 1, 0); - - void* var_27 = __visc__createNodeND(0, var_27_node); - - __visc__edge(var_26, var_27, 1, 0, 0, 0); - __visc__edge(var_26, var_27, 1, 1, 1, 0); - __visc__bindIn(var_27, 34, 2, 0); - __visc__bindIn(var_27, 35, 3, 0); - - void* var_28 = __visc__createNodeND(0, var_28_node); - - __visc__edge(var_27, var_28, 1, 0, 0, 0); - __visc__edge(var_27, var_28, 1, 1, 1, 0); - __visc__bindIn(var_28, 36, 2, 0); - __visc__bindIn(var_28, 37, 3, 0); - - void* var_29 = __visc__createNodeND(0, var_29_node); - - __visc__edge(var_28, var_29, 1, 0, 0, 0); - __visc__edge(var_28, var_29, 1, 1, 1, 0); - - void* var_30 = __visc__createNodeND(0, var_30_node); - - __visc__edge(var_29, var_30, 1, 0, 0, 0); - __visc__edge(var_29, var_30, 1, 1, 1, 0); - __visc__bindIn(var_30, 38, 2, 0); - __visc__bindIn(var_30, 39, 3, 0); - - void* var_31 = __visc__createNodeND(0, var_31_node); - - __visc__edge(var_30, var_31, 1, 0, 0, 0); - __visc__edge(var_30, var_31, 1, 1, 1, 0); - __visc__bindIn(var_31, 40, 2, 0); - __visc__bindIn(var_31, 41, 3, 0); - - void* var_32 = __visc__createNodeND(0, var_32_node); - - __visc__edge(var_31, var_32, 1, 0, 0, 0); - __visc__edge(var_31, var_32, 1, 1, 1, 0); - - void* var_33 = __visc__createNodeND(0, var_33_node); - - __visc__edge(var_32, var_33, 1, 0, 0, 0); - __visc__edge(var_32, var_33, 1, 1, 1, 0); - - void* var_34 = __visc__createNodeND(0, var_34_node); - - __visc__edge(var_33, var_34, 1, 0, 0, 0); - __visc__edge(var_33, var_34, 1, 1, 1, 0); - __visc__bindIn(var_34, 42, 2, 0); - __visc__bindIn(var_34, 43, 3, 0); - - void* var_35 = __visc__createNodeND(0, var_35_node); - - __visc__edge(var_34, var_35, 1, 0, 0, 0); - __visc__edge(var_34, var_35, 1, 1, 1, 0); - __visc__bindIn(var_35, 44, 2, 0); - __visc__bindIn(var_35, 45, 3, 0); - - void* var_36 = __visc__createNodeND(0, var_36_node); - - __visc__edge(var_35, var_36, 1, 0, 0, 0); - __visc__edge(var_35, var_36, 1, 1, 1, 0); - - void* var_37 = __visc__createNodeND(0, var_37_node); - - __visc__edge(var_36, var_37, 1, 0, 0, 0); - __visc__edge(var_36, var_37, 1, 1, 1, 0); - __visc__bindIn(var_37, 46, 2, 0); - __visc__bindIn(var_37, 47, 3, 0); - - void* var_38 = __visc__createNodeND(0, var_38_node); - - __visc__edge(var_37, var_38, 1, 0, 0, 0); - __visc__edge(var_37, var_38, 1, 1, 1, 0); - __visc__bindIn(var_38, 48, 2, 0); - __visc__bindIn(var_38, 49, 3, 0); - - void* var_39 = __visc__createNodeND(0, var_39_node); - - __visc__edge(var_38, var_39, 1, 0, 0, 0); - __visc__edge(var_38, var_39, 1, 1, 1, 0); - - void* var_40 = __visc__createNodeND(0, var_40_node); - - __visc__edge(var_39, var_40, 1, 0, 0, 0); - __visc__edge(var_39, var_40, 1, 1, 1, 0); - __visc__bindIn(var_40, 50, 2, 0); - __visc__bindIn(var_40, 51, 3, 0); - - void* var_41 = __visc__createNodeND(0, var_41_node); - - __visc__edge(var_40, var_41, 1, 0, 0, 0); - __visc__edge(var_40, var_41, 1, 1, 1, 0); - __visc__bindIn(var_41, 52, 2, 0); - __visc__bindIn(var_41, 53, 3, 0); - - void* var_42 = __visc__createNodeND(0, var_42_node); - - __visc__edge(var_41, var_42, 1, 0, 0, 0); - __visc__edge(var_41, var_42, 1, 1, 1, 0); - - void* var_43 = __visc__createNodeND(0, var_43_node); - - __visc__edge(var_42, var_43, 1, 0, 0, 0); - __visc__edge(var_42, var_43, 1, 1, 1, 0); - - void* var_44 = __visc__createNodeND(0, var_44_node); - - __visc__edge(var_43, var_44, 1, 0, 0, 0); - __visc__edge(var_43, var_44, 1, 1, 1, 0); - __visc__bindIn(var_44, 54, 2, 0); - __visc__bindIn(var_44, 55, 3, 0); - - void* var_45 = __visc__createNodeND(0, var_45_node); - - __visc__edge(var_44, var_45, 1, 0, 0, 0); - __visc__edge(var_44, var_45, 1, 1, 1, 0); - __visc__bindIn(var_45, 56, 2, 0); - __visc__bindIn(var_45, 57, 3, 0); - - void* var_46 = __visc__createNodeND(0, var_46_node); - - __visc__edge(var_45, var_46, 1, 0, 0, 0); - __visc__edge(var_45, var_46, 1, 1, 1, 0); - - void* var_47 = __visc__createNodeND(0, var_47_node); - - __visc__edge(var_46, var_47, 1, 0, 0, 0); - __visc__edge(var_46, var_47, 1, 1, 1, 0); - __visc__bindIn(var_47, 58, 2, 0); - __visc__bindIn(var_47, 59, 3, 0); - - void* var_48 = __visc__createNodeND(0, var_48_node); - - __visc__edge(var_47, var_48, 1, 0, 0, 0); - __visc__edge(var_47, var_48, 1, 1, 1, 0); - __visc__bindIn(var_48, 60, 2, 0); - __visc__bindIn(var_48, 61, 3, 0); - - void* var_49 = __visc__createNodeND(0, var_49_node); - - __visc__edge(var_48, var_49, 1, 0, 0, 0); - __visc__edge(var_48, var_49, 1, 1, 1, 0); - - __visc__bindOut(var_49, 0, 0, 0); - __visc__bindOut(var_49, 1, 1, 0); - -} - -struct ret_t { - void* tensor; - size_t bytes; -}; - -typedef struct __attribute__((__packed__)) { - void* input; - size_t input_bytes; - void* conv2d_1_w; - size_t conv2d_1_w_bytes; - void* conv2d_1_b; - size_t conv2d_1_b_bytes; - void* conv2d_2_w; - size_t conv2d_2_w_bytes; - void* conv2d_2_b; - size_t conv2d_2_b_bytes; - void* conv2d_3_w; - size_t conv2d_3_w_bytes; - void* conv2d_3_b; - size_t conv2d_3_b_bytes; - void* conv2d_4_w; - size_t conv2d_4_w_bytes; - void* conv2d_4_b; - size_t conv2d_4_b_bytes; - void* conv2d_5_w; - size_t conv2d_5_w_bytes; - void* conv2d_5_b; - size_t conv2d_5_b_bytes; - void* conv2d_6_w; - size_t conv2d_6_w_bytes; - void* conv2d_6_b; - size_t conv2d_6_b_bytes; - void* conv2d_7_w; - size_t conv2d_7_w_bytes; - void* conv2d_7_b; - size_t conv2d_7_b_bytes; - void* conv2d_8_w; - size_t conv2d_8_w_bytes; - void* conv2d_8_b; - size_t conv2d_8_b_bytes; - void* conv2d_9_w; - size_t conv2d_9_w_bytes; - void* conv2d_9_b; - size_t conv2d_9_b_bytes; - void* conv2d_10_w; - size_t conv2d_10_w_bytes; - void* conv2d_10_b; - size_t conv2d_10_b_bytes; - void* conv2d_11_w; - size_t conv2d_11_w_bytes; - void* conv2d_11_b; - size_t conv2d_11_b_bytes; - void* conv2d_12_w; - size_t conv2d_12_w_bytes; - void* conv2d_12_b; - size_t conv2d_12_b_bytes; - void* conv2d_13_w; - size_t conv2d_13_w_bytes; - void* conv2d_13_b; - size_t conv2d_13_b_bytes; - void* dense_1_w; - size_t dense_1_w_bytes; - void* dense_1_b; - size_t dense_1_b_bytes; - void* dense_2_w; - size_t dense_2_w_bytes; - void* dense_2_b; - size_t dense_2_b_bytes; - - struct ret_t r; -} -RootIn; - -int main(){ - -std::string dir_prefix = std::string("data/vgg16_cifar10/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); -void* input = readTrainedWeights(input_path.c_str(), 0,10000,3,32,32); -uint8_t* labels = readLabels(labels_path.c_str(),10000); - -__visc__init(); -RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); - -args->input = input; -args->input_bytes = 0; -args->conv2d_1_w = conv2d_1_w; -args->conv2d_1_w_bytes = 0; -args->conv2d_1_b = conv2d_1_b; -args->conv2d_1_b_bytes = 0; -args->conv2d_2_w = conv2d_2_w; -args->conv2d_2_w_bytes = 0; -args->conv2d_2_b = conv2d_2_b; -args->conv2d_2_b_bytes = 0; -args->conv2d_3_w = conv2d_3_w; -args->conv2d_3_w_bytes = 0; -args->conv2d_3_b = conv2d_3_b; -args->conv2d_3_b_bytes = 0; -args->conv2d_4_w = conv2d_4_w; -args->conv2d_4_w_bytes = 0; -args->conv2d_4_b = conv2d_4_b; -args->conv2d_4_b_bytes = 0; -args->conv2d_5_w = conv2d_5_w; -args->conv2d_5_w_bytes = 0; -args->conv2d_5_b = conv2d_5_b; -args->conv2d_5_b_bytes = 0; -args->conv2d_6_w = conv2d_6_w; -args->conv2d_6_w_bytes = 0; -args->conv2d_6_b = conv2d_6_b; -args->conv2d_6_b_bytes = 0; -args->conv2d_7_w = conv2d_7_w; -args->conv2d_7_w_bytes = 0; -args->conv2d_7_b = conv2d_7_b; -args->conv2d_7_b_bytes = 0; -args->conv2d_8_w = conv2d_8_w; -args->conv2d_8_w_bytes = 0; -args->conv2d_8_b = conv2d_8_b; -args->conv2d_8_b_bytes = 0; -args->conv2d_9_w = conv2d_9_w; -args->conv2d_9_w_bytes = 0; -args->conv2d_9_b = conv2d_9_b; -args->conv2d_9_b_bytes = 0; -args->conv2d_10_w = conv2d_10_w; -args->conv2d_10_w_bytes = 0; -args->conv2d_10_b = conv2d_10_b; -args->conv2d_10_b_bytes = 0; -args->conv2d_11_w = conv2d_11_w; -args->conv2d_11_w_bytes = 0; -args->conv2d_11_b = conv2d_11_b; -args->conv2d_11_b_bytes = 0; -args->conv2d_12_w = conv2d_12_w; -args->conv2d_12_w_bytes = 0; -args->conv2d_12_b = conv2d_12_b; -args->conv2d_12_b_bytes = 0; -args->conv2d_13_w = conv2d_13_w; -args->conv2d_13_w_bytes = 0; -args->conv2d_13_b = conv2d_13_b; -args->conv2d_13_b_bytes = 0; -args->dense_1_w = dense_1_w; -args->dense_1_w_bytes = 0; -args->dense_1_b = dense_1_b; -args->dense_1_b_bytes = 0; -args->dense_2_w = dense_2_w; -args->dense_2_w_bytes = 0; -args->dense_2_b = dense_2_b; -args->dense_2_b_bytes = 0; - -void* dfg = __visc__launch(0, root, (void*) args); - -__visc__wait(dfg); - -void *result = static_cast<RootIn*>(args)->input; -hpvm_request_tensor(result, 0); - -__visc__cleanup(); - computeAccuracy2(labels, 10000, result); -return 0; - -} diff --git a/hpvm/projects/keras/legacy/generated/vgg16/src.cc b/hpvm/projects/keras/legacy/generated/vgg16/src.cc deleted file mode 100644 index 9303866d0d29d1990c858f84ccaced7f0fc0dcc7..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/legacy/generated/vgg16/src.cc +++ /dev/null @@ -1,164 +0,0 @@ - -#include <stdio.h> -#include <stdlib.h> -#include <unistd.h> -#include <fcntl.h> -#include <sys/types.h> -#include <sys/stat.h> -#include <string.h> -#include "../../tensor_runtime/include/tensor_runtime.h" -#include "../include/utils.h" - -int main(){ - -llvm_hpvm_initTensorRt(0); - - -std::string dir_prefix = std::string("data/vgg16_cifar10/"); -std::string input_path = dir_prefix + std::string("input.bin"); -std::string labels_path = dir_prefix + std::string("labels.bin"); -std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); -void* conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0,64,3,3,3); -std::string conv2d_1_b_path = dir_prefix + std::string("conv2d_1_b.bin"); -void* conv2d_1_b = readTrainedWeights(conv2d_1_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_2_w_path = dir_prefix + std::string("conv2d_2_w.bin"); -void* conv2d_2_w = readTrainedWeights(conv2d_2_w_path.c_str(), 0,64,64,3,3); -std::string conv2d_2_b_path = dir_prefix + std::string("conv2d_2_b.bin"); -void* conv2d_2_b = readTrainedWeights(conv2d_2_b_path.c_str(), 0,1,64,1,1); -std::string conv2d_3_w_path = dir_prefix + std::string("conv2d_3_w.bin"); -void* conv2d_3_w = readTrainedWeights(conv2d_3_w_path.c_str(), 0,128,64,3,3); -std::string conv2d_3_b_path = dir_prefix + std::string("conv2d_3_b.bin"); -void* conv2d_3_b = readTrainedWeights(conv2d_3_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_4_w_path = dir_prefix + std::string("conv2d_4_w.bin"); -void* conv2d_4_w = readTrainedWeights(conv2d_4_w_path.c_str(), 0,128,128,3,3); -std::string conv2d_4_b_path = dir_prefix + std::string("conv2d_4_b.bin"); -void* conv2d_4_b = readTrainedWeights(conv2d_4_b_path.c_str(), 0,1,128,1,1); -std::string conv2d_5_w_path = dir_prefix + std::string("conv2d_5_w.bin"); -void* conv2d_5_w = readTrainedWeights(conv2d_5_w_path.c_str(), 0,256,128,3,3); -std::string conv2d_5_b_path = dir_prefix + std::string("conv2d_5_b.bin"); -void* conv2d_5_b = readTrainedWeights(conv2d_5_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_6_w_path = dir_prefix + std::string("conv2d_6_w.bin"); -void* conv2d_6_w = readTrainedWeights(conv2d_6_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_6_b_path = dir_prefix + std::string("conv2d_6_b.bin"); -void* conv2d_6_b = readTrainedWeights(conv2d_6_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_7_w_path = dir_prefix + std::string("conv2d_7_w.bin"); -void* conv2d_7_w = readTrainedWeights(conv2d_7_w_path.c_str(), 0,256,256,3,3); -std::string conv2d_7_b_path = dir_prefix + std::string("conv2d_7_b.bin"); -void* conv2d_7_b = readTrainedWeights(conv2d_7_b_path.c_str(), 0,1,256,1,1); -std::string conv2d_8_w_path = dir_prefix + std::string("conv2d_8_w.bin"); -void* conv2d_8_w = readTrainedWeights(conv2d_8_w_path.c_str(), 0,512,256,3,3); -std::string conv2d_8_b_path = dir_prefix + std::string("conv2d_8_b.bin"); -void* conv2d_8_b = readTrainedWeights(conv2d_8_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_9_w_path = dir_prefix + std::string("conv2d_9_w.bin"); -void* conv2d_9_w = readTrainedWeights(conv2d_9_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_9_b_path = dir_prefix + std::string("conv2d_9_b.bin"); -void* conv2d_9_b = readTrainedWeights(conv2d_9_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_10_w_path = dir_prefix + std::string("conv2d_10_w.bin"); -void* conv2d_10_w = readTrainedWeights(conv2d_10_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_10_b_path = dir_prefix + std::string("conv2d_10_b.bin"); -void* conv2d_10_b = readTrainedWeights(conv2d_10_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_11_w_path = dir_prefix + std::string("conv2d_11_w.bin"); -void* conv2d_11_w = readTrainedWeights(conv2d_11_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_11_b_path = dir_prefix + std::string("conv2d_11_b.bin"); -void* conv2d_11_b = readTrainedWeights(conv2d_11_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_12_w_path = dir_prefix + std::string("conv2d_12_w.bin"); -void* conv2d_12_w = readTrainedWeights(conv2d_12_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_12_b_path = dir_prefix + std::string("conv2d_12_b.bin"); -void* conv2d_12_b = readTrainedWeights(conv2d_12_b_path.c_str(), 0,1,512,1,1); -std::string conv2d_13_w_path = dir_prefix + std::string("conv2d_13_w.bin"); -void* conv2d_13_w = readTrainedWeights(conv2d_13_w_path.c_str(), 0,512,512,3,3); -std::string conv2d_13_b_path = dir_prefix + std::string("conv2d_13_b.bin"); -void* conv2d_13_b = readTrainedWeights(conv2d_13_b_path.c_str(), 0,1,512,1,1); -std::string dense_1_w_path = dir_prefix + std::string("dense_1_w.bin"); -void* dense_1_w = readTrainedWeights(dense_1_w_path.c_str(), 0,1,1,512,512); -std::string dense_1_b_path = dir_prefix + std::string("dense_1_b.bin"); -void* dense_1_b = readTrainedWeights(dense_1_b_path.c_str(), 0,1,512,1,1); -std::string dense_2_w_path = dir_prefix + std::string("dense_2_w.bin"); -void* dense_2_w = readTrainedWeights(dense_2_w_path.c_str(), 0,1,1,512,10); -std::string dense_2_b_path = dir_prefix + std::string("dense_2_b.bin"); -void* dense_2_b = readTrainedWeights(dense_2_b_path.c_str(), 0,1,10,1,1); - - - -startMemTracking(); - -int test_input_size = 10000; -int batch_size = 10000; -int batch_count = test_input_size / batch_size; -float final_accuracy = 0.0; - -for(int i = 0; i < batch_count; i++){ - -int start = i * batch_size; -int end = (i + 1) * batch_size; - -void* input = readInputBatch(input_path.c_str(),0,start,end,3,32,32); - -void* var_0 = tensorConvolution(input, conv2d_1_w, 1, 1, 1, 1, 1, 0); -void* var_1 = tensorAdd(var_0, conv2d_1_b); -void* var_2 = tensorRelu(var_1); -void* var_4 = tensorConvolution(var_2, conv2d_2_w, 1, 1, 1, 1, 1, 0); -void* var_5 = tensorAdd(var_4, conv2d_2_b); -void* var_6 = tensorRelu(var_5); -void* var_7 = tensorPooling(var_6,0,2,2,0,0,2,2); -void* var_8 = tensorConvolution(var_7, conv2d_3_w, 1, 1, 1, 1, 1, 0); -void* var_9 = tensorAdd(var_8, conv2d_3_b); -void* var_10 = tensorRelu(var_9); -void* var_12 = tensorConvolution(var_10, conv2d_4_w, 1, 1, 1, 1, 1, 0); -void* var_13 = tensorAdd(var_12, conv2d_4_b); -void* var_14 = tensorRelu(var_13); -void* var_15 = tensorPooling(var_14,0,2,2,0,0,2,2); -void* var_16 = tensorConvolution(var_15, conv2d_5_w, 1, 1, 1, 1, 1, 0); -void* var_17 = tensorAdd(var_16, conv2d_5_b); -void* var_18 = tensorRelu(var_17); -void* var_20 = tensorConvolution(var_18, conv2d_6_w, 1, 1, 1, 1, 1, 0); -void* var_21 = tensorAdd(var_20, conv2d_6_b); -void* var_22 = tensorRelu(var_21); -void* var_24 = tensorConvolution(var_22, conv2d_7_w, 1, 1, 1, 1, 1, 0); -void* var_25 = tensorAdd(var_24, conv2d_7_b); -void* var_26 = tensorRelu(var_25); -void* var_27 = tensorPooling(var_26,0,2,2,0,0,2,2); -void* var_28 = tensorConvolution(var_27, conv2d_8_w, 1, 1, 1, 1, 1, 0); -void* var_29 = tensorAdd(var_28, conv2d_8_b); -void* var_30 = tensorRelu(var_29); -void* var_32 = tensorConvolution(var_30, conv2d_9_w, 1, 1, 1, 1, 1, 0); -void* var_33 = tensorAdd(var_32, conv2d_9_b); -void* var_34 = tensorRelu(var_33); -void* var_36 = tensorConvolution(var_34, conv2d_10_w, 1, 1, 1, 1, 1, 0); -void* var_37 = tensorAdd(var_36, conv2d_10_b); -void* var_38 = tensorRelu(var_37); -void* var_39 = tensorPooling(var_38,0,2,2,0,0,2,2); -void* var_40 = tensorConvolution(var_39, conv2d_11_w, 1, 1, 1, 1, 1, 0); -void* var_41 = tensorAdd(var_40, conv2d_11_b); -void* var_42 = tensorRelu(var_41); -void* var_44 = tensorConvolution(var_42, conv2d_12_w, 1, 1, 1, 1, 1, 0); -void* var_45 = tensorAdd(var_44, conv2d_12_b); -void* var_46 = tensorRelu(var_45); -void* var_48 = tensorConvolution(var_46, conv2d_13_w, 1, 1, 1, 1, 1, 0); -void* var_49 = tensorAdd(var_48, conv2d_13_b); -void* var_50 = tensorRelu(var_49); -void* var_51 = tensorPooling(var_50,0,2,2,0,0,2,2); -void* var_54 = tensorGemmGPU(var_51, dense_1_w); -void* var_55 = tensorAdd(var_54, dense_1_b); -void* var_56 = tensorRelu(var_55); -void* var_58 = tensorGemmGPU(var_56, dense_2_w); -void* var_59 = tensorAdd(var_58, dense_2_b); -void* var_60 = tensorSoftmax(var_59); - -uint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); - -float accuracy = computeAccuracy2(labels, batch_size, var_60); -final_accuracy += accuracy; -freeBatchMemory(); - -} - -final_accuracy = final_accuracy / batch_count; -dumpFinalAccuracy(final_accuracy); - - -llvm_hpvm_cleanupTensorRt(); - -return 0; - -} diff --git a/hpvm/projects/keras/legacy/keras_environment.yml b/hpvm/projects/keras/legacy/keras_environment.yml deleted file mode 100644 index caa3a773dfd8c7a82571a923bb1941997ba59ca9..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/legacy/keras_environment.yml +++ /dev/null @@ -1,321 +0,0 @@ -name: approxhpvm_keras -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - absl-py=0.6.1=py36_0 - - anaconda-project=0.8.2=py36_0 - - asn1crypto=0.24.0=py36_0 - - automat=0.7.0=py36_0 - - babel=2.6.0=py36_0 - - backports=1.0=py36_1 - - backports.os=0.1.1=py36_0 - - beautifulsoup4=4.6.3=py36_0 - - bkcharts=0.2=py36_0 - - blaze=0.11.3=py36_0 - - conda=4.5.11=py36_0 - - conda-env=2.6.0=1 - - contextlib2=0.5.5=py36_0 - - cycler=0.10.0=py36_0 - - dill=0.2.8.2=py36_0 - - docutils=0.14=py36_0 - - entrypoints=0.2.3=py36_2 - - et_xmlfile=1.0.1=py36_0 - - idna=2.7=py36_0 - - imageio=2.4.1=py36_0 - - importlib_metadata=0.6=py36_0 - - ipython_genutils=0.2.0=py36_0 - - isort=4.3.4=py36_0 - - jdcal=1.4=py36_0 - - jedi=0.13.1=py36_0 - - jinja2=2.10=py36_0 - - jmespath=0.9.3=py36_0 - - jsonschema=2.6.0=py36_0 - - keyring=16.0.0=py36_0 - - libgcc=7.2.0=h69d50b8_2 - - libgfortran=3.0.0=1 - - locket=0.2.0=py36_1 - - more-itertools=4.3.0=py36_0 - - nbconvert=5.3.1=py36_0 - - nbformat=4.4.0=py36_0 - - nose=1.3.7=py36_2 - - notebook=5.7.0=py36_0 - - numpydoc=0.8.0=py36_0 - - odo=0.5.1=py36_0 - - pathlib2=2.3.2=py36_0 - - pexpect=4.6.0=py36_0 - - pickleshare=0.7.5=py36_0 - - ply=3.11=py36_0 - - ptyprocess=0.6.0=py36_0 - - pycodestyle=2.4.0=py36_0 - - pygments=2.2.0=py36_0 - - pylint=2.1.1=py36_0 - - pyopenssl=18.0.0=py36_0 - - qtconsole=4.4.2=py36_0 - - requests=2.19.1=py36_0 - - s3transfer=0.1.13=py36_0 - - secretstorage=3.1.0=py36_0 - - setuptools=40.5.0=py36_0 - - singledispatch=3.4.0.3=py36_0 - - six=1.11.0=py36_1 - - snowballstemmer=1.2.1=py36_0 - - sortedcollections=1.0.1=py36_0 - - sphinx=1.8.1=py36_0 - - spyder=3.3.1=py36_1 - - sympy=1.3=py36_0 - - tblib=1.3.2=py36_0 - - termcolor=1.1.0=py36_1 - - terminado=0.8.1=py36_1 - - testpath=0.4.2=py36_0 - - torchvision=0.2.1=py36_0 - - traitlets=4.3.2=py36_0 - - typing=3.6.4=py36_0 - - unicodecsv=0.14.1=py36_0 - - urllib3=1.23=py36_0 - - wcwidth=0.1.7=py36_0 - - wheel=0.32.2=py36_0 - - widgetsnbextension=3.4.2=py36_0 - - xlwt=1.3.0=py36_0 - - _tflow_select=2.1.0=gpu - - alabaster=0.7.12=py36_0 - - anaconda-client=1.7.2=py36_0 - - anaconda=custom=py36hbbc8b67_0 - - anaconda-navigator=1.9.2=py36_0 - - appdirs=1.4.3=py36h28b3542_0 - - astor=0.7.1=py36_0 - - astroid=2.0.4=py36_0 - - astropy=3.0.5=py36h7b6447c_0 - - atomicwrites=1.2.1=py36_0 - - attrs=18.2.0=py36h28b3542_0 - - backcall=0.1.0=py36_0 - - backports.shutil_get_terminal_size=1.0.0=py36_2 - - bitarray=0.8.3=py36h14c3975_0 - - blas=1.0=mkl - - bleach=3.0.2=py36_0 - - blosc=1.14.4=hdbcaa40_0 - - bokeh=1.0.1=py36_0 - - boto=2.49.0=py36_0 - - boto3=1.9.35=py36_0 - - botocore=1.12.35=py36_0 - - bottleneck=1.2.1=py36h035aef0_1 - - bz2file=0.98=py36_1 - - bzip2=1.0.6=h14c3975_5 - - ca-certificates=2018.03.07=0 - - cairo=1.14.12=h8948797_3 - - certifi=2018.10.15=py36_0 - - cffi=1.11.5=py36he75722e_1 - - chardet=3.0.4=py36_1 - - chest=0.2.3=py36_1 - - click=7.0=py36_0 - - cloudpickle=0.6.1=py36_0 - - clyent=1.2.2=py36_1 - - colorama=0.4.0=py36_0 - - configobj=5.0.6=py36_1 - - constantly=15.1.0=py36h28b3542_0 - - cryptography=2.3.1=py36hc365091_0 - - cudatoolkit=9.0=h13b8566_0 - - cudnn=7.1.2=cuda9.0_0 - - cupti=9.0.176=0 - - curl=7.61.0=h84994c4_0 - - cython=0.29=py36he6710b0_0 - - cytoolz=0.9.0.1=py36h14c3975_1 - - dask=0.20.0=py36_0 - - dask-core=0.20.0=py36_0 - - datashape=0.5.4=py36_1 - - dbus=1.13.2=h714fa37_1 - - decorator=4.3.0=py36_0 - - defusedxml=0.5.0=py36_1 - - distributed=1.24.0=py36_0 - - expat=2.2.6=he6710b0_0 - - fastcache=1.0.2=py36h14c3975_2 - - filelock=3.0.10=py36_0 - - flask=1.0.2=py36_1 - - flask-cors=3.0.6=py36_0 - - fontconfig=2.13.0=h9420a91_0 - - freetype=2.9.1=h8a8886c_1 - - fribidi=1.0.5=h7b6447c_0 - - gast=0.2.0=py36_0 - - gensim=3.4.0=py36h14c3975_0 - - get_terminal_size=1.0.0=haa9412d_0 - - gevent=1.3.7=py36h7b6447c_1 - - glib=2.56.2=hd408876_0 - - glob2=0.6=py36_1 - - gmp=6.1.2=h6c8ec71_1 - - gmpy2=2.0.8=py36h10f8cd9_2 - - graphite2=1.3.12=h23475e2_2 - - greenlet=0.4.15=py36h7b6447c_0 - - grpcio=1.12.1=py36hdbcaa40_0 - - gst-plugins-base=1.14.0=hbbd80ab_1 - - gstreamer=1.14.0=hb453b48_1 - - h5py=2.8.0=py36h989c5e5_3 - - harfbuzz=1.8.8=hffaf4a1_0 - - hdf5=1.10.2=hba1933b_1 - - heapdict=1.0.0=py36_2 - - html5lib=1.0.1=py36_0 - - hyperlink=18.0.0=py36_0 - - icu=58.2=h9c2bf20_1 - - imagesize=1.1.0=py36_0 - - incremental=17.5.0=py36_0 - - ipykernel=5.1.0=py36h39e3cac_0 - - ipython=7.1.1=py36h39e3cac_0 - - ipywidgets=7.4.2=py36_0 - - itsdangerous=1.1.0=py36_0 - - jbig=2.1=hdba287a_0 - - jeepney=0.4=py36_0 - - jpeg=9b=h024ee3a_2 - - keras=2.1.6=py36_0 - - keras-applications=1.0.6=py36_0 - - keras-preprocessing=1.0.5=py36_0 - - kiwisolver=1.0.1=py36hf484d3e_0 - - lazy-object-proxy=1.3.1=py36h14c3975_2 - - libcurl=7.61.0=h1ad7b7a_0 - - libedit=3.1.20170329=h6b74fdf_2 - - libffi=3.2.1=hd88cf55_4 - - libgcc-ng=8.2.0=hdf63c60_1 - - libgfortran-ng=7.3.0=hdf63c60_0 - - libiconv=1.15=h63c8f33_5 - - libpng=1.6.35=hbc83047_0 - - libprotobuf=3.6.1=hd408876_0 - - libsodium=1.0.16=h1bed415_0 - - libssh2=1.8.0=h9cfc8f7_4 - - libstdcxx-ng=8.2.0=hdf63c60_1 - - libtiff=4.0.9=he85c1e1_2 - - libtool=2.4.6=h7b6447c_5 - - libuuid=1.0.3=h1bed415_2 - - libxcb=1.13=h1bed415_1 - - libxml2=2.9.8=h26e45fe_1 - - libxslt=1.1.32=h1312cb7_0 - - llvmlite=0.25.0=py36hd408876_0 - - lxml=4.2.5=py36hefd8a0e_0 - - lzo=2.10=h49e0be7_2 - - markdown=3.0.1=py36_0 - - markupsafe=1.0=py36h14c3975_1 - - matplotlib=3.0.1=py36h5429711_0 - - mccabe=0.6.1=py36_1 - - mistune=0.8.4=py36h7b6447c_0 - - mkl=2018.0.3=1 - - mkl-service=1.1.2=py36h90e4bf4_5 - - mkl_fft=1.0.6=py36h7dd41cf_0 - - mkl_random=1.0.1=py36h4414c95_1 - - mpc=1.1.0=h10f8cd9_1 - - mpfr=4.0.1=hdf1c602_3 - - mpmath=1.0.0=py36_2 - - msgpack-python=0.5.6=py36h6bb024c_1 - - multipledispatch=0.6.0=py36_0 - - navigator-updater=0.2.1=py36_0 - - nccl=1.3.5=cuda9.0_0 - - ncurses=6.1=hf484d3e_0 - - networkx=2.2=py36_1 - - ninja=1.8.2=py36h6bb024c_1 - - nltk=3.3.0=py36_0 - - numba=0.40.0=py36h962f231_0 - - numexpr=2.6.8=py36hd89afb7_0 - - numpy=1.15.3=py36h1d66e8a_0 - - numpy-base=1.15.3=py36h81de0dd_0 - - olefile=0.46=py36_0 - - openpyxl=2.5.9=py36_0 - - openssl=1.0.2p=h14c3975_0 - - packaging=18.0=py36_0 - - pandas=0.23.4=py36h04863e7_0 - - pandoc=2.2.3.2=0 - - pandocfilters=1.4.2=py36_1 - - pango=1.42.4=h049681c_0 - - parso=0.3.1=py36_0 - - partd=0.3.9=py36_0 - - patchelf=0.9=he6710b0_3 - - path.py=11.5.0=py36_0 - - patsy=0.5.1=py36_0 - - pcre=8.42=h439df22_0 - - pep8=1.7.1=py36_0 - - pillow=5.3.0=py36h34e0f95_0 - - pip=18.1=py36_0 - - pixman=0.34.0=hceecf20_3 - - pkginfo=1.4.2=py36_1 - - pluggy=0.8.0=py36_0 - - prometheus_client=0.4.2=py36_0 - - prompt_toolkit=2.0.7=py36_0 - - protobuf=3.6.1=py36he6710b0_0 - - psutil=5.4.8=py36h7b6447c_0 - - py=1.7.0=py36_0 - - pyasn1=0.4.4=py36h28b3542_0 - - pyasn1-modules=0.2.2=py36_0 - - pycosat=0.6.3=py36h14c3975_0 - - pycparser=2.19=py36_0 - - pycrypto=2.6.1=py36h14c3975_9 - - pycurl=7.43.0.2=py36hb7f436b_0 - - pyflakes=2.0.0=py36_0 - - pyhamcrest=1.9.0=py36_2 - - pyodbc=4.0.24=py36he6710b0_0 - - pyparsing=2.2.2=py36_0 - - pyqt=5.9.2=py36h05f1152_2 - - pysocks=1.6.8=py36_0 - - pytables=3.4.4=py36ha205bf6_0 - - pytest=3.9.3=py36_0 - - pytest-arraydiff=0.2=py36h39e3cac_0 - - pytest-astropy=0.4.0=py36_0 - - pytest-doctestplus=0.1.3=py36_0 - - pytest-openfiles=0.3.0=py36_0 - - pytest-remotedata=0.3.1=py36_0 - - python=3.6.6=h6e4f718_2 - - python-dateutil=2.7.5=py36_0 - - pytorch=0.4.1=py36ha74772b_0 - - pytz=2018.7=py36_0 - - pywavelets=1.0.1=py36hdd07704_0 - - pyyaml=3.13=py36h14c3975_0 - - pyzmq=17.1.2=py36h14c3975_0 - - qt=5.9.6=h8703b6f_2 - - qtawesome=0.5.2=py36_0 - - qtpy=1.5.2=py36_0 - - readline=7.0=h7b6447c_5 - - redis=5.0.0=h7b6447c_0 - - redis-py=2.10.6=py36_0 - - rope=0.11.0=py36_0 - - ruamel_yaml=0.15.46=py36h14c3975_0 - - scikit-image=0.14.0=py36hf484d3e_1 - - scikit-learn=0.20.0=py36h4989274_1 - - scipy=1.1.0=py36hfa4b5c9_1 - - seaborn=0.9.0=py36_0 - - send2trash=1.5.0=py36_0 - - service_identity=17.0.0=py36h28b3542_0 - - simplegeneric=0.8.1=py36_2 - - sip=4.19.8=py36hf484d3e_0 - - smart_open=1.7.1=py36_0 - - snappy=1.1.7=hbae5bb6_3 - - sockjs-tornado=1.0.6=py36_0 - - sortedcontainers=2.0.5=py36_0 - - sphinxcontrib=1.0=py36_1 - - sphinxcontrib-websupport=1.1.0=py36_1 - - spyder-kernels=0.2.6=py36_0 - - sqlalchemy=1.2.12=py36h7b6447c_0 - - sqlite=3.25.2=h7b6447c_0 - - statsmodels=0.9.0=py36h035aef0_0 - - tensorboard=1.11.0=py36hf484d3e_0 - - tensorflow=1.11.0=gpu_py36h4459f94_0 - - tensorflow-base=1.11.0=gpu_py36h8e0ae2d_0 - - tensorflow-gpu=1.11.0=h0d30ee6_0 - - tk=8.6.8=hbc83047_0 - - toolz=0.9.0=py36_0 - - tornado=5.1.1=py36h7b6447c_0 - - tqdm=4.28.1=py36h28b3542_0 - - twisted=18.9.0=py36h7b6447c_0 - - typed-ast=1.1.0=py36h14c3975_0 - - unixodbc=2.3.7=h14c3975_0 - - webencodings=0.5.1=py36_1 - - werkzeug=0.14.1=py36_0 - - wrapt=1.10.11=py36h14c3975_2 - - xlrd=1.1.0=py36_1 - - xlsxwriter=1.1.2=py36_0 - - xz=5.2.4=h14c3975_4 - - yaml=0.1.7=had09818_2 - - zeromq=4.2.5=hf484d3e_1 - - zict=0.1.3=py36_0 - - zlib=1.2.11=ha838bed_2 - - zope=1.0=py36_1 - - zope.interface=4.6.0=py36h7b6447c_0 - - cuda91=1.0=h4c16780_0 - - pip: - - msgpack==0.5.6 - - tables==3.4.4 - - torch==0.4.1 - diff --git a/hpvm/projects/keras/legacy/keras_environment_deps.yml b/hpvm/projects/keras/legacy/keras_environment_deps.yml deleted file mode 100644 index 13876b26150ccbe01fb29cb8efc74a22bfbc3784..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/legacy/keras_environment_deps.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: approxhpvm_keras -channels: - - pytorch - - conda-forge - - defaults -dependencies: - - conda=4.5.11=py36_0 - - conda-env=2.6.0=1 - - libgcc=7.2.0=h69d50b8_2 - - libgfortran=3.0.0=1 - - torchvision=0.2.1=py36_0 - - _tflow_select=2.1.0=gpu - - anaconda-client=1.7.2=py36_0 - - anaconda=custom=py36hbbc8b67_0 - - anaconda-navigator=1.9.2=py36_0 - - cudatoolkit=9.0=h13b8566_0 - - cudnn=7.1.2=cuda9.0_0 - - cupti=9.0.176=0 - - curl=7.61.0=h84994c4_0 - - glib=2.56.2=hd408876_0 - - keras=2.1.6=py36_0 - - keras-applications=1.0.6=py36_0 - - keras-preprocessing=1.0.5=py36_0 - - mkl=2018.0.3=1 - - mkl-service=1.1.2=py36h90e4bf4_5 - - mkl_fft=1.0.6=py36h7dd41cf_0 - - mkl_random=1.0.1=py36h4414c95_1 - - ncurses=6.1=hf484d3e_0 - - numpy=1.15.3=py36h1d66e8a_0 - - numpy-base=1.15.3=py36h81de0dd_0 - - pandas=0.23.4=py36h04863e7_0 - - pandoc=2.2.3.2=0 - - pandocfilters=1.4.2=py36_1 - - pip=18.1=py36_0 - - psutil=5.4.8=py36h7b6447c_0 - - py=1.7.0=py36_0 - - pycurl=7.43.0.2=py36hb7f436b_0 - - python=3.6.6=h6e4f718_2 - - pytorch=0.4.1=py36ha74772b_0 - - scipy=1.1.0=py36hfa4b5c9_1 - - tensorboard=1.11.0=py36hf484d3e_0 - - tensorflow=1.11.0=gpu_py36h4459f94_0 - - tensorflow-base=1.11.0=gpu_py36h8e0ae2d_0 - - tensorflow-gpu=1.11.0=h0d30ee6_0 - - zlib=1.2.11=ha838bed_2 - - cuda91=1.0=h4c16780_0 - - pip: - - msgpack==0.5.6 - - tables==3.4.4 - - torch==0.4.1 - diff --git a/hpvm/projects/keras/src/Benchmark.py b/hpvm/projects/keras/src/Benchmark.py index e83a78f4ea5d776a21ed6a6b47ccdb840f42c129..3610b2e9a5ad10c2b3d90795eb20b3d6839b730f 100644 --- a/hpvm/projects/keras/src/Benchmark.py +++ b/hpvm/projects/keras/src/Benchmark.py @@ -14,12 +14,14 @@ from frontend.weight_utils import reloadHPVMWeights # Defines common interfaces and virtual methods to be overridden by child classes class Benchmark: - def __init__(self, name, reload_dir, keras_model_file, hpvm_dir, num_classes): + def __init__(self, name, reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size=500): self.name = name self.reload_dir = reload_dir self.keras_model_file = keras_model_file - self.hpvm_dir = hpvm_dir + self.data_dir = data_dir + self.src_dir = src_dir self.num_classes = num_classes + self.batch_size = batch_size def buildModel(self): @@ -28,7 +30,7 @@ class Benchmark: def data_preprocess(self): return - def trainModel(self): + def trainModel(self, X_train, y_train, X_test, y_test): return def inference(self): @@ -51,7 +53,7 @@ class Benchmark: def printUsage(self): - print ("Usage: python ${benchmark.py} [hpvm_reload|keras_reload|train] [frontend] [compile]") + print ("Usage: python ${benchmark.py} [hpvm_reload|train] [frontend] [compile]") sys.exit(0) @@ -60,46 +62,61 @@ class Benchmark: if len(argv) < 2: self.printUsage() + print ("Build Model ...") # Virtual method call implemented by each CNN model = self.buildModel() + print ("Data Preprocess... \n") # Virtual method call to preprocess test and train data - X_train, Y_train, X_test, Y_test = self.data_preprocess() + X_train, y_train, X_test, y_test, X_tuner, y_tuner = self.data_preprocess() if argv[1] == "hpvm_reload": print ("loading weights .....\n\n") - model = reloadHPVMWeights(model, self.reload_dir, self.keras_model_file, X_test, Y_test) + model = reloadHPVMWeights(model, self.reload_dir, self.keras_model_file) elif argv[1] == "keras_reload": - model = load_model(self.keras_model_file) + model.load_weights(self.keras_model_file) + model.compile(loss='categorical_crossentropy', + optimizer='adam', + metrics=['accuracy']) elif argv[1] == "train": - model = self.trainModel(model) - + print ("Train Model ...") + model = self.trainModel(model, X_train, y_train, X_test, y_test) else: self.printUsage() - score = model.evaluate(X_test, to_categorical(Y_test, self.num_classes), verbose=0) + score = model.evaluate(X_test, to_categorical(y_test, self.num_classes), verbose=0) print('Test accuracy2:', score[1]) - if len(argv) > 2 and argv[2] == "frontend": - if argv[1] == "keras_reload": - print("ERROR: Must load HPVM model to invoke frontend - use 'hpvm_reload'") - sys.exit(1) + if len(argv) > 2: + if argv[2] == "frontend": + + # Main call to ApproxHPVM-Keras Frontend + working_dir = translate_to_approxhpvm(model, + self.data_dir, self.src_dir, ## "data/test_src/", + X_test, y_test, + X_tuner, y_tuner, + self.batch_size, # FIXIT + self.num_classes, + (argv[1] == "hpvm_reload")) # Do not redump HPVM weights if `hpvm_reload` used - # Main call to ApproxHPVM-Keras Frontend - working_dir = translate_to_approxhpvm(model, self.hpvm_dir, X_test, Y_test, self.num_classes) - #-- print ("*** working_dir = ", working_dir) - - if len(argv) > 3 and argv[3] == "compile": - self.compileSource(working_dir) + if len(argv) > 3 and argv[3] == "compile": + self.compileSource(working_dir) + else: + self.printUsage() - else: - self.printUsage() + if argv[2] == "keras_dump": + model.save_weights(self.keras_model_file) + + elif len(argv) > 2: self.printUsage() + + + diff --git a/hpvm/projects/keras/src/Config.py b/hpvm/projects/keras/src/Config.py new file mode 100644 index 0000000000000000000000000000000000000000..2edc5c1add5542edabdd052097ccb4b45d608472 --- /dev/null +++ b/hpvm/projects/keras/src/Config.py @@ -0,0 +1,3 @@ + +# Path Relative to Model Params Directory +MODEL_PARAMS_DIR = "../../../hpvm/test/dnn_benchmarks/model_params/" diff --git a/hpvm/projects/keras/src/__init__.py b/hpvm/projects/keras/src/__init__.py deleted file mode 100644 index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..0000000000000000000000000000000000000000 diff --git a/hpvm/projects/keras/src/alexnet.py b/hpvm/projects/keras/src/alexnet.py index 9bfe80a156ae21e9befea3a6872b63758e37e2a3..4b23fd995ffcc5a4f3234566a8a76dac8c12c6aa 100644 --- a/hpvm/projects/keras/src/alexnet.py +++ b/hpvm/projects/keras/src/alexnet.py @@ -1,179 +1,147 @@ +import os +import sys +import glob import numpy as np - -from keras.datasets import cifar10 -from keras.models import Sequential -from keras.models import load_model -from keras.layers.core import Dense, Dropout, Flatten, Activation -from keras.layers.convolutional import Conv2D +import tensorflow as tf +import scipy +import scipy.io +import keras +from keras.models import Model, Sequential +from keras.layers import * from keras.optimizers import Adam -from keras.layers.pooling import MaxPooling2D -from keras.utils.np_utils import to_categorical -from keras.preprocessing.image import ImageDataGenerator -from keras import backend as K from keras import regularizers +from keras import backend as K +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator from keras.callbacks import LearningRateScheduler -import sys -import struct -import keras -import numpy as np -import os -from Benchmark import Benchmark - +from keras.datasets import cifar10 +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR -class AlexNet(Benchmark): - - - - def lr_schedule(self, epoch): - lrate = 0.001 - if epoch > 20: - lrate = 0.0005 - if epoch > 40: - lrate = 0.0003 - if epoch > 60: - lrate = 0.0001 - if epoch > 80: - lrate = 0.00005 - - return lrate +class AlexNet_CIFAR10(Benchmark): + def buildModel(self): - def buildModel(self): + activation_type = 'tanh' + weight_decay = 1e-4 - print ("BuildModel ...") - - activation_type = "tanh" - weight_decay = 1e-4 + model = Sequential() + + model.add(Conv2D(64, (11, 11), padding='same', activation=activation_type, + kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32))) + model.add(MaxPooling2D(2, 2)) + model.add(Dropout(0.2)) + + model.add(Conv2D(192, (5, 5), padding='same', activation=activation_type, + kernel_regularizer=regularizers.l2(weight_decay))) + model.add(MaxPooling2D(2, 2)) + model.add(Dropout(0.3)) - model = Sequential() - model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type, - input_shape=(3, 32, 32), padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.2)) - model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same', + model.add(Conv2D(384, (3, 3), padding='same', activation=activation_type, + kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Conv2D(256, (3, 3), padding='same', activation=activation_type, kernel_regularizer=regularizers.l2(weight_decay))) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.3)) - - model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', - kernel_regularizer=regularizers.l2(weight_decay) )) - model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) - model.add(Dropout(0.4)) - - model.add(Flatten()) - #model.add(Flatten()) - #model.add(Dense(256)) - model.add(Dense(10)) - model.add(Activation('softmax')) - - return model - - - - - def trainModel(self, model): - - (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() - test_labels = Y_test - train_labels = Y_train - - #X_train = X_train.astype('float32') - #X_test = X_test.astype('float32') - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train,axis=(0,1,2,3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) - - dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" - - #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) - # Compile the model - model.compile(loss='categorical_crossentropy', - optimizer=Adam(lr=0.0001, decay=1e-6), - #optimizer = opt_rms, - metrics=['accuracy']) - - #print to_categorical(Y_train, 10) - print (to_categorical(Y_train)) - - - datagen = ImageDataGenerator( - rotation_range=15, - width_shift_range=0.1, - height_shift_range=0.1, - horizontal_flip=True, - ) - datagen.fit(X_train) - - - model.fit(X_train, to_categorical(Y_train, 10), - batch_size=128, - shuffle=True, - epochs = 1, - #epochs=100, - validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(self.lr_schedule)]) + model.add(Conv2D(256, (3, 3), padding='same', activation=activation_type, + kernel_regularizer=regularizers.l2(weight_decay))) + model.add(MaxPooling2D(2, 2)) + model.add(Dropout(0.4)) - # Evaluate the model - scores = model.evaluate(X_test, to_categorical(Y_test, 10)) + model.add(Flatten()) + #model.add(Flatten()) + #model.add(Dense(256)) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) + + return model - print('Loss: %.3f' % scores[0]) - print('Accuracy: %.3f' % scores[1]) - print ("*** TRAINED MODEL ****\n") + + def data_preprocess(self): - return model + (X_train, y_train), (X_val, y_val) = cifar10.load_data() + X_train = X_train / 255.0 + X_val = X_val / 255.0 + mean = np.mean(X_train) + std = np.std(X_train) + X_train = (X_train - mean) / (std + 1e-7) + X_val = (X_val - mean) / (std + 1e-7) - def data_preprocess(self): + X_test = X_val[0:5000] + y_test = y_val[0:5000] + X_tuner = X_val[5000:] + y_tuner = y_val[5000:] - print ("Data Preprocess... \n") + return X_train, y_train, X_test, y_test, X_tuner, y_tuner - (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() - print ("Data Loaded... \n") - - X_train = X_train / 255.0 - X_test = X_test / 255.0 - - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train,axis=(0,1,2,3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) - - return X_train, Y_train, X_test, Y_test + def trainModel(self, model, X_train, y_train, X_test, y_test): + + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) + + model.compile( + loss='categorical_crossentropy', + optimizer=Adam(lr=0.0001, decay=1e-6), + metrics=['accuracy'] + ) + + datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + datagen.fit(X_train) + + + def lr_schedule(epoch): + lrate = 0.001 + if epoch > 20: + lrate = 0.0005 + if epoch > 40: + lrate = 0.0003 + if epoch > 60: + lrate = 0.0001 + if epoch > 80: + lrate = 0.00005 + return lrate + + model.fit( + X_train, + y_train, + batch_size=128, + shuffle=True, + epochs=100, + validation_data=(X_test, y_test), + callbacks=[LearningRateScheduler(lr_schedule)] + ) + + return model - - -if __name__ == "__main__": - - - os.environ["CUDA_VISIBLE_DEVICES"] = "0" +if __name__ == '__main__': + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Changing to NCHW format K.set_image_data_format('channels_first') ### Parameters specific to each benchmark - reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" - keras_model_file = "alexnet.h5" - hpvm_dir = "data/alexnet_cifar10/" + reload_dir = MODEL_PARAMS_DIR + '/alexnet_cifar10/' + keras_model_file = MODEL_PARAMS_DIR + '/alexnet_cifar10/weights.h5' + data_dir = '/alexnet_cifar10/' + src_dir = 'data/alexnet_cifar10_src/' num_classes = 10 - - alexnet = AlexNet("AlexNet", reload_dir, keras_model_file, hpvm_dir, num_classes) + batch_size = 500 + + model = AlexNet_CIFAR10('AlexNet_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) - alexnet.run(sys.argv) + model.run(sys.argv) diff --git a/hpvm/projects/keras/src/alexnet2.py b/hpvm/projects/keras/src/alexnet2.py index e29917b26f4c59472148c6c1cbb3babf785d5b5d..de69d8c12972df7a1fa51338b30676ffafc65f4e 100644 --- a/hpvm/projects/keras/src/alexnet2.py +++ b/hpvm/projects/keras/src/alexnet2.py @@ -1,154 +1,143 @@ - +import os import sys +import glob + +import numpy as np +import tensorflow as tf +import scipy +import scipy.io import keras -from keras.models import Sequential -from keras.utils import np_utils -from keras.preprocessing.image import ImageDataGenerator -from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization -from keras.layers import Conv2D, MaxPooling2D -from keras.datasets import cifar10 +from keras.models import Model, Sequential +from keras.layers import * +from keras.optimizers import Adam from keras import regularizers -from keras.callbacks import LearningRateScheduler -import numpy as np -import os -import struct -from Benchmark import Benchmark from keras import backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm - - - -class AlexNet2(Benchmark): - - - - def lr_schedule2(self, epoch): - lrate = 0.0005 - if epoch > 100: - lrate = 0.0003 - if epoch > 200: - lrate = 0.0002 - if epoch > 250: - lrate = 0.0001 - if epoch > 300: - lrate = 0.00003 - - return lrate +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler - +from keras.datasets import cifar10 +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR - def buildModel(self): - weight_decay = 1e-4 - activation_type = 'tanh' - model = Sequential() - model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32) )) - model.add(Activation(activation_type)) - model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation(activation_type)) - model.add(MaxPooling2D(pool_size=(2,2))) - model.add(Dropout(0.2)) +class AlexNet2_CIFAR10(Benchmark): - model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation(activation_type)) - model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation(activation_type)) - model.add(MaxPooling2D(pool_size=(2,2))) - model.add(Dropout(0.3)) + def buildModel(self): - model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation(activation_type)) - model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) - model.add(Activation(activation_type)) - model.add(MaxPooling2D(pool_size=(2,2))) - model.add(Dropout(0.4)) + weight_decay = 1e-4 + activation_type = 'tanh' - model.add(Flatten()) - model.add(Dense(self.num_classes)) - model.add(Activation('softmax')) - model.summary() + model = Sequential() + model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=(3, 32, 32))) + model.add(Activation(activation_type)) + model.add(Conv2D(32, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation(activation_type)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.2)) - return model + model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation(activation_type)) + model.add(Conv2D(64, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation(activation_type)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.3)) + model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation(activation_type)) + model.add(Conv2D(128, (3, 3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation(activation_type)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.4)) + model.add(Flatten()) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) - def trainModel(self, model): + return model - (x_train, y_train), (x_test, y_test) = cifar10.load_data() + + def data_preprocess(self): - test_labels = y_test - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') + (X_train, y_train), (X_val, y_val) = cifar10.load_data() - #z-score - mean = np.mean(x_train,axis=(0,1,2,3)) - std = np.std(x_train,axis=(0,1,2,3)) - x_train = (x_train-mean)/(std+1e-7) - x_test = (x_test-mean)/(std+1e-7) + X_train = X_train / 255.0 + X_val = X_val / 255.0 - y_train = np_utils.to_categorical(y_train, self.num_classes) - y_test = np_utils.to_categorical(y_test, self.num_classes) + mean = np.mean(X_train) + std = np.std(X_train) + X_train = (X_train - mean) / (std + 1e-7) + X_val = (X_val - mean) / (std + 1e-7) - #data augmentation - datagen = ImageDataGenerator( - rotation_range=15, - width_shift_range=0.1, - height_shift_range=0.1, - horizontal_flip=True, - ) + X_test = X_val[0:5000] + y_test = y_val[0:5000] + X_tuner = X_val[5000:] + y_tuner = y_val[5000:] - datagen.fit(x_train) + return X_train, y_train, X_test, y_test, X_tuner, y_tuner + - #training - batch_size = 64 - opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) + def trainModel(self, model, X_train, y_train, X_test, y_test): + + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) - model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy']) + model.compile( + loss='categorical_crossentropy', + optimizer=Adam(lr=0.0001), + metrics=['accuracy'] + ) - model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),\ - steps_per_epoch=x_train.shape[0] // batch_size, #epochs=350,\ - epochs=3, - verbose=1,validation_data=(x_test,y_test), \ - callbacks=[LearningRateScheduler(self.lr_schedule2)]) + datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + datagen.fit(X_train) + + + def lr_schedule(epoch): + lrate = 0.001 + if epoch > 20: + lrate = 0.0005 + if epoch > 40: + lrate = 0.0003 + if epoch > 60: + lrate = 0.0001 + return lrate + + model.fit( + X_train, + y_train, + batch_size=128, + shuffle=True, + epochs=100, + validation_data=(X_test, y_test), + callbacks=[LearningRateScheduler(lr_schedule)] + ) return model - - def data_preprocess(self): - - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - - #z-score - mean = np.mean(x_train,axis=(0,1,2,3)) - std = np.std(x_train,axis=(0,1,2,3)) - x_train = (x_train-mean)/(std+1e-7) - x_test = (x_test-mean)/(std+1e-7) - - return x_train, y_train, x_test, y_test - - - - -if __name__ == "__main__": +if __name__ == '__main__': - os.environ["CUDA_VISIBLE_DEVICES"] = "0" + os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Changing to NCHW format K.set_image_data_format('channels_first') ### Parameters specific to each benchmark - reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/" - keras_model_file = "alexnet2.h5" - hpvm_dir = "data/alexnet2_cifar10/" + reload_dir = MODEL_PARAMS_DIR + '/alexnet2_cifar10/' + keras_model_file = MODEL_PARAMS_DIR + '/alexnet2_cifar10/weights.h5' + data_dir = '/alexnet2_cifar10/' + src_dir = 'data/alexnet2_cifar10_src/' num_classes = 10 + batch_size = 500 - alexnet2 = AlexNet2("AlexNet2", reload_dir, keras_model_file, hpvm_dir, num_classes) + model = AlexNet2_CIFAR10('AlexNet2_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) - alexnet2.run(sys.argv) + model.run(sys.argv) diff --git a/hpvm/projects/keras/src/alexnet_imagenet.py b/hpvm/projects/keras/src/alexnet_imagenet.py index 41cf2d837cf8ba764b614512f40a0f6c0522ab3d..e3ab937e9bb355fde74a63664c8657c76d6343f5 100644 --- a/hpvm/projects/keras/src/alexnet_imagenet.py +++ b/hpvm/projects/keras/src/alexnet_imagenet.py @@ -1,259 +1,107 @@ import os +import sys import glob -import random -import scipy -import scipy.io -import cv2 import numpy as np - import tensorflow as tf +import scipy +import scipy.io import keras -from keras.models import Sequential, Model +from keras.models import Model, Sequential from keras.layers import * -from keras.utils import to_categorical +from keras.optimizers import Adam +from keras import regularizers from keras import backend as K -import torchvision.models as models - -from frontend.approxhpvm_translator import translate_to_approxhpvm -from frontend.weight_utils import dumpCalibrationData2 - - -np.random.seed(2020) - -os.environ["CUDA_VISIBLE_DEVICES"] = "1" - -K.set_image_data_format('channels_first') - -data_format = 'channels_first' - - -IMAGENET_DIR = '/home/nz11/ILSVRC2012/' -OUTPUT_DIR = 'data/alexnet_imagenet_tune/' -WEIGHTS_PATH = 'data/alexnet_imagenet_tune/weights.h5' - -NUM_CLASSES = 200 -IMAGES_PER_CLASS = 50 -# VAL_SIZE = 100 - - - -def get_alexnet_nchw_keras(): - - input_layer = Input((3, 224, 224)) - - x = ZeroPadding2D((2, 2))(input_layer) - x = Conv2D(64, (11, 11), strides=4, padding='valid')(x) - x = Activation('relu')(x) - x = MaxPooling2D(3, 2)(x) - - x = ZeroPadding2D((2, 2))(x) - x = Conv2D(192, (5, 5), padding='valid')(x) - x = Activation('relu')(x) - x = MaxPooling2D(3, 2)(x) - - x = Conv2D(384, (3, 3), padding='same')(x) - x = Activation('relu')(x) - - x = Conv2D(256, (3, 3), padding='same')(x) - x = Activation('relu')(x) - - x = Conv2D(256, (3, 3), padding='same')(x) - x = Activation('relu')(x) - - x = MaxPooling2D(3, 2)(x) - - x = Flatten()(x) - x = Dropout(0.5)(x) - x = Dense(4096)(x) - x = Activation('relu')(x) - x = Dropout(0.5)(x) - x = Dense(4096)(x) - x = Activation('relu')(x) - x = Dense(1000)(x) - x = Activation('softmax')(x) - - model_nchw = Model(input_layer, x) - - - torch_model = models.alexnet(pretrained=True) - - j = 0 - torch_weights = list(torch_model.parameters()) - for i in range(len(model_nchw.layers)): - if (2 * j >= len(torch_weights)): - break +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler - w = torch_weights[2 * j].detach().numpy() - b = torch_weights[2 * j + 1].detach().numpy() +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR - if (len(w.shape) == 4): - w = np.transpose(w, (2, 3, 1, 0)) - else: - w = w.transpose() - try: - model_nchw.layers[i].set_weights([w, b]) - j += 1 - print ([w.shape, b.shape], 'loaded') - except: - pass - - return model_nchw +class AlexNet(Benchmark): -def load_image(x): - - image = cv2.imread(x) + def data_preprocess(self): + X_train, y_train = None, None - height, width, _ = image.shape - new_height = height * 256 // min(image.shape[:2]) - new_width = width * 256 // min(image.shape[:2]) - image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC) - - height, width, _ = image.shape - startx = width // 2 - (224 // 2) - starty = height // 2 - (224 // 2) - image = image[starty:starty + 224, startx:startx + 224] - - image = image[:, :, ::-1] - image = np.transpose(image, (2, 0, 1)) - - - image[:, :, 0] = (image[:, :, 0] - 0.485) / 0.229 - image[:, :, 1] = (image[:, :, 1] - 0.456) / 0.224 - image[:, :, 2] = (image[:, :, 2] - 0.406) / 0.225 - + X_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_input.bin', dtype=np.float32) + X_test = X_test.reshape((-1, 3, 224, 224)) + y_test = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/test_labels.bin', dtype=np.uint32) - return image.astype(np.float32) - - -meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat') -original_idx_to_synset = {} -synset_to_name = {} - -for i in range(1000): - ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0]) - synset = meta['synsets'][i,0][1][0] - name = meta['synsets'][i,0][2][0] - original_idx_to_synset[ilsvrc2012_id] = synset - synset_to_name[synset] = name - -synset_to_keras_idx = {} -keras_idx_to_name = {} -f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r') -c = 0 -for line in f: - parts = line.split(' ') - synset_to_keras_idx[parts[0]] = c - keras_idx_to_name[c] = ' '.join(parts[1:]) - c += 1 -f.close() - - - - -model = get_alexnet_nchw_keras() + X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/tune_input.bin', dtype=np.float32) + X_tuner = X_tuner.reshape((-1, 3, 224, 224)) + y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/alexnet_imagenet/tune_labels.bin', dtype=np.uint32) + + return X_train, y_train, X_test, y_test, X_tuner, y_tuner -X_tune, X_test = [], [] -y_tune, y_true = [], [] -classes = glob.glob(IMAGENET_DIR + 'val/*') + def buildModel(self): -for c in np.random.permutation(len(classes))[:NUM_CLASSES]: - x = glob.glob(classes[c] + '/*') - x = np.array(x) - - idx = np.random.permutation(len(x)) - idx = idx[:max(len(idx), IMAGES_PER_CLASS)] - - synset = classes[c].split('/')[-1] - images = list(map(lambda x : load_image(x), x[idx])) - labels = [synset_to_keras_idx[synset]] * len(x[idx]) - - X_test += images[:IMAGES_PER_CLASS // 2] - y_true += labels[:IMAGES_PER_CLASS // 2] - - X_tune += images[IMAGES_PER_CLASS // 2:] - y_tune += labels[IMAGES_PER_CLASS // 2:] - - -X_test = np.array(X_test) -y_true = np.array(y_true) -X_tune = np.array(X_tune) -y_tune = np.array(y_tune) + input_layer = Input((3, 224, 224)) -print ('tune size', len(X_tune)) -print ('test size', len(X_test)) + x = ZeroPadding2D((2, 2))(input_layer) + x = Conv2D(64, (11, 11), strides=4, padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D(3, 2)(x) + x = ZeroPadding2D((2, 2))(x) + x = Conv2D(192, (5, 5), padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D(3, 2)(x) + x = Conv2D(384, (3, 3), padding='same')(x) + x = Activation('relu')(x) + x = Conv2D(256, (3, 3), padding='same')(x) + x = Activation('relu')(x) + x = Conv2D(256, (3, 3), padding='same')(x) + x = Activation('relu')(x) + x = MaxPooling2D(3, 2)(x) -def train_helper(x): - - try: - x = x.decode('utf-8') - except: - pass - - image = load_image(x) - - y = np.zeros(1000, dtype=np.uint8) - - y[synset_to_keras_idx[x.split('/')[-2]]] = 1 + x = Flatten()(x) + x = Dropout(0.5)(x) + x = Dense(4096)(x) + x = Activation('relu')(x) + x = Dropout(0.5)(x) + x = Dense(4096)(x) + x = Activation('relu')(x) + x = Dense(self.num_classes)(x) + x = Activation('softmax')(x) - return image, y - - + model = Model(input_layer, x) -train_images = glob.glob(IMAGENET_DIR + 'train/*/*') -random.shuffle(train_images) + return model -dataset = tf.data.Dataset().from_tensor_slices(train_images) -dataset = dataset.map( - lambda x : tf.py_func(train_helper, [x], [tf.float32, tf.uint8]), - num_parallel_calls=16 -) -dataset = dataset.shuffle(buffer_size=1000) -dataset = dataset.batch(64) -dataset = dataset.repeat() + def trainModel(self, model, X_train, y_train, X_test, y_test): -next_element = dataset.make_one_shot_iterator().get_next() + assert False, "ImageNet training not supported - use Pretrained weights" -sess = tf.Session() -def generate(): - while True: - yield sess.run(next_element) +if __name__ == '__main__': + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + # Changing to NCHW format + K.set_image_data_format('channels_first') -model.compile(optimizer=keras.optimizers.Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['acc']) - -if os.path.exists(WEIGHTS_PATH): - model.load_weights(WEIGHTS_PATH) -else: - pass -# model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=3) -# K.set_value(model.optimizer.lr, 0.000001) -# model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=3) - -translate_to_approxhpvm(model, OUTPUT_DIR, X_tune, y_tune, 1000) - -# # dumpCalibrationData2(OUTPUT_DIR + 'test_input_10K.bin', X_test, OUTPUT_DIR + 'test_labels_10K.bin', y_true) -# dumpCalibrationData2(OUTPUT_DIR + 'tune_input.bin', X_tune, OUTPUT_DIR + 'tune_labels.bin', y_tune) -# dumpCalibrationData2(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true) + ### Parameters specific to each benchmark + reload_dir = MODEL_PARAMS_DIR + '/alexnet_imagenet/' + keras_model_file = MODEL_PARAMS_DIR + '/alexnet_imagenet/weights.h5' + data_dir = '/alexnet_imagenet/' + src_dir = 'data/alexnet_imagenet_src/' + num_classes = 1000 + batch_size = 50 -pred = np.argmax(model.predict(X_test), axis=1) -print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test)) + model = AlexNet('AlexNet_Imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) -pred = np.argmax(model.predict(X_tune), axis=1) -print ('val accuracy', np.sum(pred == y_tune.ravel()) / len(X_tune)) + model.run(sys.argv) -model.save_weights(OUTPUT_DIR + '/weights.h5') \ No newline at end of file diff --git a/hpvm/projects/keras/src/legacy/lenet_conv_test.py b/hpvm/projects/keras/src/legacy/lenet_conv_test.py deleted file mode 100644 index c9588eef6c393457617b7fdda03c7b8222af5357..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/src/legacy/lenet_conv_test.py +++ /dev/null @@ -1,97 +0,0 @@ - -import sys -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten, Activation -from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D -from keras import backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm - - -batch_size = 128 -num_classes = 10 - - -# input image dimensions -img_rows, img_cols = 28, 28 - - -if __name__ == "__main__": - - # Changing Keras data format to NCHW - NHWC is default - # NOTE: ApproxHPVM requires NCHW format - K.set_image_data_format('channels_first') - - # Loads Mnist dataset - (x_train, y_train), (x_test, y_test) = mnist.load_data() - test_labels = y_test - - # Reshaping data to be NCHW format - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) - - - # Data Normalization - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - - - # convert class vectors to binary class matrices - required by Keras - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - - - - # Network Compostion: 3 Conv Layers, 2 Dense Layers - model = Sequential() - - # ConvLayer1 - model.add(Conv2D(32, kernel_size=(5, 5), - activation='relu', - padding = 'same', - input_shape=input_shape)) - model.add(MaxPooling2D(pool_size=(2, 2))) - - # ConvLayer2 - model.add(Conv2D(64, (5, 5), activation='relu', padding = 'same')) - - # ConvLayer3 - # NOTE: ZeroPading needed for ConvLayer with strides > 1 - model.add(ZeroPadding2D(padding = (1,1))) - model.add(Conv2D(64, (3, 3), strides = (2,2), activation='relu', padding = 'valid') ) - - model.add(Flatten()) - # DenseLayer1 - model.add(Dense(1024, activation='relu')) - # DenseLayer2 - model.add(Dense(num_classes, activation='relu')) - # Softmax Layer - model.add(Activation('softmax')) - - - # Configures model for training - model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adadelta(), - metrics=['accuracy']) - - # Training - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=5, - verbose=1, - validation_data=(x_test, y_test)) - - - # Inference - score = model.evaluate(x_test, y_test, verbose=0) - print('Test loss:', score[0]) - print('Test accuracy:', score[1]) - - - # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src - translate_to_approxhpvm(model, "data/lenet_hpvm_batch/", x_test, test_labels, 10) - diff --git a/hpvm/projects/keras/src/legacy/mobilenet_imagenet.py b/hpvm/projects/keras/src/legacy/mobilenet_imagenet.py deleted file mode 100644 index c5cef5193b203633015a5dc8f4be065991bf5608..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/src/legacy/mobilenet_imagenet.py +++ /dev/null @@ -1,263 +0,0 @@ -import os -import glob -import random - -import scipy -import scipy.io -import cv2 -import numpy as np - -import tensorflow as tf -import keras -from keras.models import Sequential, Model -from keras.layers import * -from keras.applications.mobilenet import MobileNet, preprocess_input -from keras.utils import to_categorical -from keras import backend as K - -from frontend.approxhpvm_translator import translate_to_approxhpvm -from frontend.weight_utils import dumpCalibrationData - - -np.random.seed(2020) - -os.environ["CUDA_VISIBLE_DEVICES"] = "1" - -K.set_image_data_format('channels_first') - -data_format = 'channels_first' - - -IMAGENET_DIR = '/home/nz11/ILSVRC2012/' -OUTPUT_DIR = 'data/mobilenet_imagenet/' - -NUM_CLASSES = 100 -IMAGES_PER_CLASS = 200 -VAL_SIZE = 100 - - - -def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): - channel_axis = 1 - filters = int(filters * alpha) - - x = ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs) - x = Conv2D(filters, kernel, - padding='valid', - use_bias=False, - strides=strides)(x) - x = BatchNormalization(axis=channel_axis)(x) - return Activation('relu')(x) - - -def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, - depth_multiplier=1, strides=(1, 1), block_id=1): - channel_axis = 1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - if strides != (1, 1): - x = ZeroPadding2D(padding=((0, 1), (0, 1)))(inputs) - else: - x = inputs - - x = DepthwiseConv2D((3, 3), - padding='same' if strides == (1, 1) else 'valid', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1))(x) - x = BatchNormalization(axis=channel_axis)(x) - return Activation('relu')(x) - - - -def get_mobilenet_nchw_keras(): - - alpha=1.0 - dropout=1e-3 - depth_multiplier=1 - - img_input = Input(shape=(3, 224, 224)) - - - x = _conv_block(img_input, 32, alpha, strides=(2, 2)) - x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) - - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, - strides=(2, 2), block_id=2) - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) - - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, - strides=(2, 2), block_id=4) - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) - - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, - strides=(2, 2), block_id=6) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) - - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, - strides=(2, 2), block_id=12) - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) - - - x = AveragePooling2D((7, 7))(x) - x = Conv2D(1000, (1, 1), - padding='same')(x) - x = Flatten()(x) - x = Activation('softmax')(x) - - - model = Model(img_input, x) - - - K.set_image_data_format('channels_last') - original_model = MobileNet() - K.set_image_data_format('channels_first') - - j = 0 - for i in range(0, len(original_model.layers)): - try: - model.layers[j].set_weights(original_model.layers[i].get_weights()) - print (j, 'loaded') -# model.layers[j].trainable = False - j += 1 - except: - print (j, 'skipped', model.layers[j]) - - return model - - - -def load_image(x): - image = cv2.imread(x) - - height, width, _ = image.shape - new_height = height * 256 // min(image.shape[:2]) - new_width = width * 256 // min(image.shape[:2]) - image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC) - - height, width, _ = image.shape - startx = width // 2 - (224 // 2) - starty = height // 2 - (224 // 2) - image = image[starty:starty + 224, startx:startx + 224] - - image = image[:, :, ::-1] - image = np.transpose(image, (2, 0, 1)) - image = preprocess_input(image.astype(np.float32)) - - return image.astype(np.float32) - - -meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat') -original_idx_to_synset = {} -synset_to_name = {} - -for i in range(1000): - ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0]) - synset = meta['synsets'][i,0][1][0] - name = meta['synsets'][i,0][2][0] - original_idx_to_synset[ilsvrc2012_id] = synset - synset_to_name[synset] = name - -synset_to_keras_idx = {} -keras_idx_to_name = {} -f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r') -c = 0 -for line in f: - parts = line.split(' ') - synset_to_keras_idx[parts[0]] = c - keras_idx_to_name[c] = ' '.join(parts[1:]) - c += 1 -f.close() - - - -model = get_mobilenet_nchw_keras() - -X_test = [] -y_true = [] - -classes = glob.glob(IMAGENET_DIR + 'val/*') - -for c in np.random.permutation(len(classes))[:NUM_CLASSES]: - x = glob.glob(classes[c] + '/*') - x = np.array(x) - - idx = np.random.permutation(len(x)) - idx = idx[:max(len(idx), IMAGES_PER_CLASS)] - - X_test += list(map(lambda x : load_image(x), x[idx])) - - synset = classes[c].split('/')[-1] - y_true += [synset_to_keras_idx[synset]] * len(x[idx]) - -X_test = np.array(X_test) -y_true = np.array(y_true) - - - - -# def train_helper(x): - -# try: -# x = x.decode('utf-8') -# except: -# pass - -# image = load_image(x) - -# y = np.zeros(1000, dtype=np.uint8) - -# y[synset_to_keras_idx[x.split('/')[-2]]]= 1 - -# return image, y - - -# train_images = glob.glob(IMAGENET_DIR + 'train/*/*') -# random.shuffle(train_images) - -# dataset = tf.data.Dataset().from_tensor_slices(train_images) -# dataset = dataset.map( -# lambda x : tf.py_func(train_helper, [x], [tf.float32, tf.uint8]), -# num_parallel_calls=16 -# ) - -# dataset = dataset.shuffle(buffer_size=1000) -# dataset = dataset.batch(32) -# dataset = dataset.repeat() - -# next_element = dataset.make_one_shot_iterator().get_next() - -# sess = tf.Session() - -# def generate(): -# while True: -# yield sess.run(next_element) - - - -# model.compile(optimizer=keras.optimizers.Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['acc']) - - -# model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=7) - - -translate_to_approxhpvm(model, OUTPUT_DIR, X_test[:VAL_SIZE], y_true[:VAL_SIZE], 1000) - -dumpCalibrationData(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true) - - -# pred = np.argmax(model.predict(X_test), axis=1) -# print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test)) - - \ No newline at end of file diff --git a/hpvm/projects/keras/src/legacy/mobilenet_shallow.py b/hpvm/projects/keras/src/legacy/mobilenet_shallow.py deleted file mode 100644 index 64df7f98174f22a59f3382ed4337d23e29900051..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/src/legacy/mobilenet_shallow.py +++ /dev/null @@ -1,158 +0,0 @@ -import sys -import os -os.environ['CUDA_VISIBLE_DEVICES'] = '0' - -from keras.models import Sequential -from keras.layers import * -from keras.datasets import cifar10 -from keras.utils import to_categorical -from keras.callbacks import * -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras import optimizers -import keras.backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm - - -K.set_image_data_format('channels_first') - -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -test_labels = y_test - -print ("X_train.shape = ", X_train.shape) -print ("X_test.shape = ", X_test.shape) - - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - - -mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True) -std = np.std(X_train, axis=(0, 2, 3), keepdims=True) - -X_train = (X_train - mean) / (std + 1e-9) -X_test = (X_test - mean) / (std + 1e-9) - -y_train = to_categorical(y_train, num_classes=10) -y_test = to_categorical(y_test, num_classes=10) - - -def get_mobilenet(alpha=1, depth_multiplier=1): - model = Sequential() - - def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): - channel_axis = 1 - filters = int(filters * alpha) - model.add(Conv2D(filters, kernel, - padding='same', - use_bias=False, - strides=strides, - input_shape=(3, 32, 32))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): - channel_axis = 1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - model.add(DepthwiseConv2D((3, 3), - padding='same', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False)) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - model.add(Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - - _conv_block(32, alpha, strides=(1, 1)) - - _depthwise_conv_block(64, alpha, depth_multiplier) - - _depthwise_conv_block(128, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(128, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(256, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(256, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(512, alpha, depth_multiplier, - strides=(2, 2)) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# model.add(Dropout(rate=0.5)) - -# _depthwise_conv_block(512, alpha, depth_multiplier) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# _depthwise_conv_block(512, alpha, depth_multiplier) -# model.add(Dropout(rate=0.5)) - -# _depthwise_conv_block(1024, alpha, depth_multiplier, -# strides=(2, 2)) -# _depthwise_conv_block(1024, alpha, depth_multiplier) -# model.add(Dropout(rate=0.5)) - - model.add(AveragePooling2D(pool_size=2)) - model.add(Flatten()) - model.add(Dense(10, activation='softmax')) - - return model - - -# data augmentation, horizontal flips only -datagen = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=0.0, - width_shift_range=0.2, - height_shift_range=0.2, - vertical_flip=False, - horizontal_flip=True) -datagen.fit(X_train) - - -model = get_mobilenet() - -learning_rates=[] -for i in range(5): - learning_rates.append(5e-2) -for i in range(50-5): - learning_rates.append(2e-2) -for i in range(100-50): - learning_rates.append(8e-3) -for i in range(150-100): - learning_rates.append(4e-3) -for i in range(200-150): - learning_rates.append(2e-3) -for i in range(250-200): - learning_rates.append(1e-3) - -callbacks = [ - LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) -] - -model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), - loss='categorical_crossentropy', - metrics=['accuracy']) - -model.fit_generator( - datagen.flow(X_train, y_train, batch_size=128), - steps_per_epoch=int(np.ceil(50000 / 128)), - validation_data=(X_test, y_test), - #epochs=300, - epochs=250, - callbacks=callbacks -) - -model.summary() - -translate_to_approxhpvm(model, "data/mobilenet_shallow/", X_test, test_labels, 10) - diff --git a/hpvm/projects/keras/src/legacy/mobilenetv2_cifar10.py b/hpvm/projects/keras/src/legacy/mobilenetv2_cifar10.py deleted file mode 100644 index 2fbed4623d0e57d7a0dd948fa0894127fea72324..0000000000000000000000000000000000000000 --- a/hpvm/projects/keras/src/legacy/mobilenetv2_cifar10.py +++ /dev/null @@ -1,176 +0,0 @@ -import sys -import os -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -from keras.models import Sequential -from keras.layers import * -from keras.datasets import cifar10 -from keras.utils import to_categorical -from keras.callbacks import * -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras import optimizers -import keras.backend as K - - -K.set_image_data_format('channels_first') - -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -test_labels = y_test - -print ("X_train.shape = ", X_train.shape) -print ("X_test.shape = ", X_test.shape) - - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - -mean = np.mean(X_train, axis=(0, 1, 2), keepdims=True) -std = np.std(X_train, axis=(0, 1, 2), keepdims=True) -X_train = (X_train - mean) / (std + 1e-9) -X_test = (X_test - mean) / (std + 1e-9) - -y_train = to_categorical(y_train, num_classes=10) -y_test = to_categorical(y_test, num_classes=10) - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - -def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): - channel_axis = 1 - - in_channels = inputs.shape[1] - pointwise_conv_filters = int(filters * alpha) - pointwise_filters = _make_divisible(pointwise_conv_filters, 8) - x = inputs - - if block_id: - x = Conv2D(int(expansion * in_channels), kernel_size=1, strides=1, padding='valid', use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - if stride == 2: - x = ZeroPadding2D(padding=(1, 1))(x) - else: - x = ZeroPadding2D(padding=(1, 1))(x) - - x = DepthwiseConv2D(kernel_size=3, strides=stride, use_bias=False, padding='valid')(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(pointwise_filters, kernel_size=1, strides=1, padding='valid', use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - - - if in_channels == pointwise_filters and stride == 1: - return Add()([inputs, x]) - return x - -def get_mobilenetv2(alpha=1.0, depth_multiplier=1): - - channel_axis = 1 - - first_block_filters = _make_divisible(32 * alpha, 8) - img_input = Input(shape=(3, 32, 32)) - - x = ZeroPadding2D(padding=(1, 1))(img_input) - x = Conv2D(first_block_filters, kernel_size=3, strides=1, padding='valid', use_bias=False)(x) - #x = BatchNormalization(axis=channel_axis)(x) - #x = Activation('relu')(x) - - x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, expansion=1, block_id=0 ) - - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=1 ) - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, expansion=6, block_id=2 ) - - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, expansion=6, block_id=3 ) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=4 ) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, expansion=6, block_id=5 ) - - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, expansion=6, block_id=6 ) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=7 ) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=8 ) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, expansion=6, block_id=9 ) - x = Dropout(rate=0.25)(x) - - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=10) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=11) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, expansion=6, block_id=12) - x = Dropout(rate=0.25)(x) - - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, expansion=6, block_id=13) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=14) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, expansion=6, block_id=15) - x = Dropout(rate=0.25)(x) - - x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, expansion=6, block_id=16) - x = Dropout(rate=0.25)(x) - - if alpha > 1.0: - last_block_filters = _make_divisible(1280 * alpha, 8) - else: - last_block_filters = 1280 - - x = Conv2D(last_block_filters, kernel_size=1, use_bias=False)(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - x = AveragePooling2D()(x) - x = Flatten()(x) - x = Dense(10, activation='softmax')(x) - - model = Model(inputs=img_input, outputs=x) - return model - - -# data augmentation, horizontal flips only -datagen = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=0.0, - width_shift_range=0.0, - height_shift_range=0.0, - vertical_flip=False, - horizontal_flip=True) -datagen.fit(X_train) - - -model = get_mobilenetv2() - -learning_rates=[] -for i in range(5): - learning_rates.append(2e-2) -for i in range(50-5): - learning_rates.append(1e-2) -for i in range(100-50): - learning_rates.append(8e-3) -for i in range(150-100): - learning_rates.append(4e-3) -for i in range(200-150): - learning_rates.append(2e-3) -for i in range(300-200): - learning_rates.append(1e-3) - -callbacks = [ - LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) -] - -model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), - loss='categorical_crossentropy', - metrics=['accuracy']) - -model.fit_generator( - datagen.flow(X_train, y_train, batch_size=128), - steps_per_epoch=int(np.ceil(50000 / 128)), - validation_data=(X_test, y_test), - epochs=300, - callbacks=callbacks -) - diff --git a/hpvm/projects/keras/src/lenet.py b/hpvm/projects/keras/src/lenet.py index 4cfee4f12a961a0885af1b2e07563e21d097623b..01c84719e6b90d317f7e0dce012577b08b33fcbf 100644 --- a/hpvm/projects/keras/src/lenet.py +++ b/hpvm/projects/keras/src/lenet.py @@ -1,95 +1,115 @@ - +import os import sys +import glob + +import numpy as np +import tensorflow as tf +import scipy +import scipy.io import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten, Activation -from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D, GlobalMaxPooling2D +from keras.models import Model, Sequential +from keras.layers import * +from keras.optimizers import Adam +from keras import regularizers from keras import backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler +from keras.datasets import mnist +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR -batch_size = 128 -num_classes = 10 -# input image dimensions -img_rows, img_cols = 28, 28 +class LeNet_MNIST(Benchmark): + def buildModel(self): -if __name__ == "__main__": + # Network Compostion: 2 Conv Layers, 2 Dense Layers + model = Sequential() - # Changing Keras data format to NCHW - NHWC is default - # NOTE: ApproxHPVM requires NCHW format - K.set_image_data_format('channels_first') + # ConvLayer1 + model.add(Conv2D(32, kernel_size=(5, 5), padding='same', activation='tanh', input_shape=(1, 28, 28))) + model.add(MaxPooling2D(pool_size=(2, 2))) - # Loads Mnist dataset - (x_train, y_train), (x_test, y_test) = mnist.load_data() - test_labels = y_test + # ConvLayer2 + model.add(Conv2D(64, (5, 5), activation='tanh', padding='same')) + model.add(MaxPooling2D(pool_size=(2, 2))) - # Reshaping data to be NCHW format - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) + model.add(Flatten()) + + # DenseLayer1 + model.add(Dense(1024, activation='tanh')) + # DenseLayer2 + + model.add(Dense(self.num_classes, activation='tanh')) + # Softmax Layer + model.add(Activation('softmax')) + return model - # Data Normalization - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - - # convert class vectors to binary class matrices - required by Keras - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) + def data_preprocess(self): + (X_train, y_train), (X_val, y_val) = mnist.load_data() + test_labels = y_val + X_train = X_train.reshape(X_train.shape[0], 1, 28, 28) + X_train = X_train.astype('float32') + X_train /= 255 + X_test = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/test_input.bin', dtype=np.float32) + X_test = X_test.reshape((-1, 1, 28, 28)) + y_test = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/test_labels.bin', dtype=np.uint32) + + X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/tune_input.bin', dtype=np.float32) + X_tuner = X_tuner.reshape((-1, 1, 28, 28)) + y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/lenet_mnist/tune_labels.bin', dtype=np.uint32) - # Network Compostion: 3 Conv Layers, 2 Dense Layers - model = Sequential() + return X_train, y_train, X_test, y_test, X_tuner, y_tuner + - # ConvLayer1 - model.add(Conv2D(32, kernel_size=(5, 5), - activation='tanh', - padding = 'same', - input_shape=input_shape)) - model.add(MaxPooling2D(pool_size=(2, 2))) + def trainModel(self, model, X_train, y_train, X_test, y_test): + + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) + + model.compile( + loss='categorical_crossentropy', + optimizer=keras.optimizers.Adadelta(), + metrics=['accuracy'] + ) + + model.fit( + X_train, + y_train, + batch_size=128, + epochs=10, + verbose=1, + validation_data=(X_test, y_test) + ) + + return model - # ConvLayer2 - model.add(Conv2D(64, (5, 5), activation='tanh', padding = 'same')) - model.add(MaxPooling2D(pool_size=(2, 2))) - - - model.add(Flatten()) - # DenseLayer1 - model.add(Dense(1024, activation='relu')) - # DenseLayer2 - model.add(Dense(num_classes, activation='relu')) - # Softmax Layer - model.add(Activation('softmax')) - - - # Configures model for training - model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adadelta(), - metrics=['accuracy']) - - # Training - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=5, - verbose=1, - validation_data=(x_test, y_test)) - - - # Inference - score = model.evaluate(x_test, y_test, verbose=0) - print('Test loss:', score[0]) - print('Test accuracy:', score[1]) +if __name__ == '__main__': + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + # Changing to NCHW format + K.set_image_data_format('channels_first') - # NOTE: Call to ApproxHPVM Translator - Dumps weights and ApproxHPVM C src - translate_to_approxhpvm(model, "data/lenet_standard/", x_test, test_labels, 10) + ### Parameters specific to each benchmark + reload_dir = MODEL_PARAMS_DIR + '/lenet_mnist/' + keras_model_file = MODEL_PARAMS_DIR + '/lenet_mnist/weights.h5' + data_dir = '/lenet_mnist/' + src_dir = 'data/lenet_mnist_src/' + num_classes = 10 + batch_size = 500 + + print (reload_dir) + + model = LeNet_MNIST('LeNet_MNIST', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) + + model.run(sys.argv) diff --git a/hpvm/projects/keras/src/mobilenet_cifar10.py b/hpvm/projects/keras/src/mobilenet_cifar10.py index b739ed819634f30f4b33173443ac41f848f9c8f1..367a4dfc6244228b7b1336d1a63044273cebd2fb 100644 --- a/hpvm/projects/keras/src/mobilenet_cifar10.py +++ b/hpvm/projects/keras/src/mobilenet_cifar10.py @@ -1,161 +1,188 @@ - -import sys import os -os.environ['CUDA_VISIBLE_DEVICES'] = '1' - -from keras.models import Sequential +import sys +import glob + +import numpy as np +import tensorflow as tf +import scipy +import scipy.io +import keras +from keras.models import Model, Sequential from keras.layers import * -from keras.datasets import cifar10 +from keras.optimizers import Adam +from keras import regularizers +from keras import backend as K from keras.utils import to_categorical -from keras.callbacks import * from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras import optimizers -import keras.backend as K -from frontend.approxhpvm_translator import translate_to_approxhpvm +from keras.callbacks import LearningRateScheduler +from keras.datasets import cifar10 +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR -K.set_image_data_format('channels_first') -(X_train, y_train), (X_test, y_test) = cifar10.load_data() -test_labels = y_test -print ("X_train.shape = ", X_train.shape) -print ("X_test.shape = ", X_test.shape) +class MobileNet_CIFAR10(Benchmark): + def buildModel(self): + alpha=1 + depth_multiplier=1 -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') + model = Sequential() + def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): + channel_axis = 1 -mean = np.mean(X_train, axis=(0, 2, 3), keepdims=True) -std = np.std(X_train, axis=(0, 2, 3), keepdims=True) + model.add(Conv2D(filters, kernel, + padding='same', + use_bias=False, + strides=strides, + input_shape=(3, 32, 32))) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) -X_train = (X_train - mean) / (std + 1e-9) -X_test = (X_test - mean) / (std + 1e-9) + def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): + channel_axis = 1 -y_train = to_categorical(y_train, num_classes=10) -y_test = to_categorical(y_test, num_classes=10) + model.add(ZeroPadding2D(padding=((1,1), (1,1)))) + model.add(DepthwiseConv2D((3, 3), + padding='valid', + #depth_multiplier=depth_multiplier, + strides=strides, + use_bias=False)) + model.add(BatchNormalization(axis=channel_axis)) -def get_mobilenet(alpha=1, depth_multiplier=1): - model = Sequential() - - def _conv_block(filters, alpha, kernel=(3, 3), strides=(1, 1)): - channel_axis = 1 - - model.add(Conv2D(filters, kernel, - padding='same', - use_bias=False, - strides=strides, - input_shape=(3, 32, 32))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) - - def _depthwise_conv_block(pointwise_conv_filters, alpha, depth_multiplier=1, strides=(1, 1)): - channel_axis = 1 + model.add(Activation('relu')) + model.add(Conv2D(pointwise_conv_filters, (1, 1), + padding='same', + use_bias=False, + strides=(1, 1))) + model.add(BatchNormalization(axis=channel_axis)) + model.add(Activation('relu')) - model.add(ZeroPadding2D(padding = ((1,1), (1,1) ))) - model.add(DepthwiseConv2D((3, 3), - padding='valid', - #depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False)) - model.add(BatchNormalization(axis=channel_axis)) - - model.add(Activation('relu')) - model.add(Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1))) - model.add(BatchNormalization(axis=channel_axis)) - model.add(Activation('relu')) + _conv_block(32, alpha, strides=(1, 1)) + _depthwise_conv_block(64, alpha, depth_multiplier) + + _depthwise_conv_block(128, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(128, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(256, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(256, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(512, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(512, alpha, depth_multiplier) + _depthwise_conv_block(512, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(512, alpha, depth_multiplier) + _depthwise_conv_block(512, alpha, depth_multiplier) + _depthwise_conv_block(512, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + _depthwise_conv_block(1024, alpha, depth_multiplier, + strides=(2, 2)) + _depthwise_conv_block(1024, alpha, depth_multiplier) + model.add(Dropout(rate=0.5)) + + model.add(AveragePooling2D(pool_size=2)) + model.add(Flatten()) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) + + return model - _conv_block(32, alpha, strides=(1, 1)) - - _depthwise_conv_block(64, alpha, depth_multiplier) - - _depthwise_conv_block(128, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(128, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(256, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(256, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - - _depthwise_conv_block(512, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(512, alpha, depth_multiplier) - _depthwise_conv_block(512, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - _depthwise_conv_block(512, alpha, depth_multiplier) - _depthwise_conv_block(512, alpha, depth_multiplier) - _depthwise_conv_block(512, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) + def data_preprocess(self): + + (X_train, y_train), (X_val, y_val) = cifar10.load_data() + + X_train = X_train / 255.0 + X_val = X_val / 255.0 + + mean = np.mean(X_train) + std = np.std(X_train) + X_train = (X_train - mean) / (std + 1e-7) + X_val = (X_val - mean) / (std + 1e-7) + + X_test = X_val[0:5000] + y_test = y_val[0:5000] + X_tuner = X_val[5000:] + y_tuner = y_val[5000:] + + return X_train, y_train, X_test, y_test, X_tuner, y_tuner - _depthwise_conv_block(1024, alpha, depth_multiplier, - strides=(2, 2)) - _depthwise_conv_block(1024, alpha, depth_multiplier) - model.add(Dropout(rate=0.5)) - model.add(AveragePooling2D(pool_size=2)) - model.add(Flatten()) - model.add(Dense(10, activation='softmax')) + def trainModel(self, model, X_train, y_train, X_test, y_test): - return model + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) + + # data augmentation, horizontal flips only + datagen = ImageDataGenerator( + featurewise_center=False, + featurewise_std_normalization=False, + rotation_range=0.0, + width_shift_range=0.0, + height_shift_range=0.0, + vertical_flip=False, + horizontal_flip=True) + datagen.fit(X_train) + + + learning_rates=[] + for i in range(50): + learning_rates.append(0.01) + for i in range(75-50): + learning_rates.append(0.001) + for i in range(100-75): + learning_rates.append(0.0001) + for i in range(125-100): + learning_rates.append(0.00001) + + callbacks = [ + LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) + ] + + model.compile(optimizer=keras.optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0), + loss='categorical_crossentropy', + metrics=['accuracy']) + + model.fit_generator( + datagen.flow(X_train, y_train, batch_size=128), + steps_per_epoch=int(np.ceil(50000 / 128)), + validation_data=(X_test, y_test), + epochs=125, + callbacks=callbacks + ) + + return model + + +if __name__ == '__main__': + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + # Changing to NCHW format + K.set_image_data_format('channels_first') + + + ### Parameters specific to each benchmark + reload_dir = MODEL_PARAMS_DIR + '/mobilenet_cifar10/' + keras_model_file = MODEL_PARAMS_DIR + '/mobilenet_cifar10/weights.h5' + data_dir = '/mobilenet_cifar10/' + src_dir = 'data/mobilenet_cifar10_src/' + num_classes = 10 + batch_size = 500 + + model = MobileNet_CIFAR10('MobileNet_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) -# data augmentation, horizontal flips only -datagen = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=0.0, - width_shift_range=0.0, - height_shift_range=0.0, - vertical_flip=False, - horizontal_flip=True) -datagen.fit(X_train) - - -model = get_mobilenet() - -learning_rates=[] -for i in range(5): - learning_rates.append(2e-2) -for i in range(50-5): - learning_rates.append(1e-2) -for i in range(100-50): - learning_rates.append(8e-3) -for i in range(150-100): - learning_rates.append(4e-3) -for i in range(200-150): - learning_rates.append(2e-3) -for i in range(300-200): - learning_rates.append(1e-3) - -callbacks = [ - LearningRateScheduler(lambda epoch: float(learning_rates[epoch])) -] - -model.compile(optimizer=optimizers.SGD(lr=learning_rates[0], momentum=0.9, decay=0.0, nesterov=False), - loss='categorical_crossentropy', - metrics=['accuracy']) - -model.fit_generator( - datagen.flow(X_train, y_train, batch_size=128), - steps_per_epoch=int(np.ceil(50000 / 128)), - validation_data=(X_test, y_test), - #epochs=300, - epochs=50, - callbacks=callbacks -) - -model.summary() - -translate_to_approxhpvm(model, "data/mobilenet_hpvm/", X_test, test_labels, 10) + model.run(sys.argv) diff --git a/hpvm/projects/keras/src/resnet18_cifar10.py b/hpvm/projects/keras/src/resnet18_cifar10.py index 6afa1c50fa470d038577ff8c3c4b5df43d9bab6b..74abc7ad9f860963c770aaa6bea27b7c16d59738 100644 --- a/hpvm/projects/keras/src/resnet18_cifar10.py +++ b/hpvm/projects/keras/src/resnet18_cifar10.py @@ -33,42 +33,34 @@ ResNet164 v2| 18| - %| 94.54 %| - ResNet1001 v2|111| - %| 95.08+-.14 %| - """ -from __future__ import print_function -import keras -from keras.layers import Dense, Conv2D, BatchNormalization, Activation -from keras.layers import AveragePooling2D, Input, Flatten, ZeroPadding2D -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint, LearningRateScheduler -from keras.callbacks import ReduceLROnPlateau -from keras.preprocessing.image import ImageDataGenerator -from keras.regularizers import l2 -from keras import backend as K -from keras.models import Model -from keras.datasets import cifar10 -from keras import backend as K -import numpy as np import os import sys -from approxhpvm_translator import translate_to_approxhpvm -from weight_utils import dumpCalibrationData - +import glob +import numpy as np +import tensorflow as tf +import scipy +import scipy.io +import keras +from keras.models import Model, Sequential +from keras.layers import * +from keras.optimizers import Adam +from keras import regularizers +from keras import backend as K +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler -os.environ["CUDA_VISIBLE_DEVICES"] = "0" - +from keras.datasets import cifar10 +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR -K.set_image_data_format('channels_first') # Training parameters batch_size = 32 # orig paper trained all networks with batch_size=128 -#---- epochs = 200 -epochs = 2 -data_augmentation = True -num_classes = 10 +epochs = 200 -# Subtracting pixel mean improves accuracy -subtract_pixel_mean = True # Model parameter # ---------------------------------------------------------------------------- @@ -99,63 +91,8 @@ elif version == 2: # Model name, depth and version model_type = 'ResNet%dv%d' % (depth, version) -# Load the CIFAR10 data. -(x_train, y_train), (x_test, y_test) = cifar10.load_data() -test_labels = y_test -train_labels = y_train - -# Input image dimensions. -input_shape = x_train.shape[1:] - -# Normalize data. -x_train = x_train.astype('float32') / 255 -x_test = x_test.astype('float32') / 255 - -# If subtract pixel mean is enabled -if subtract_pixel_mean: - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') -print('y_train shape:', y_train.shape) - -# Convert class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - - - - - - -def lr_schedule(epoch): - """Learning Rate Schedule - - Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. - Called automatically every epoch as part of callbacks during training. - - # Arguments - epoch (int): The number of epochs - - # Returns - lr (float32): learning rate - """ - lr = 1e-3 - if epoch > 180: - lr *= 0.5e-3 - elif epoch > 160: - lr *= 1e-3 - elif epoch > 120: - lr *= 1e-2 - elif epoch > 80: - lr *= 1e-1 - print('Learning rate: ', lr) - return lr - + def resnet_layer(inputs, num_filters=16, kernel_size=3, @@ -183,14 +120,14 @@ def resnet_layer(inputs, strides=strides, padding='valid', # NOTE: using valid convs with explicit pad operation kernel_initializer='he_normal', - kernel_regularizer=l2(1e-4)) + kernel_regularizer=regularizers.l2(1e-4)) padding_value = int((kernel_size - 1) / 2) zero_padding = ZeroPadding2D(padding = (padding_value, padding_value)) # FIXME: Temporarily disabled batch normalization batch_normalization = False - + x = inputs x = zero_padding(x) if conv_first: @@ -208,364 +145,436 @@ def resnet_layer(inputs, return x +class ResNet18_CIFAR10(Benchmark): -def resnet_v0(input_shape, depth, num_classes=10): - """ResNet Version 1 Model builder [a] - - Stacks of 2 x (3 x 3) Conv2D-BN-ReLU - Last ReLU is after the shortcut connection. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filters is - doubled. Within each stage, the layers have the same number filters and the - same number of filters. - Features maps sizes: - stage 0: 32x32, 16 - stage 1: 16x16, 32 - stage 2: 8x8, 64 - The Number of parameters is approx the same as Table 6 of [a]: - ResNet20 0.27M - ResNet32 0.46M - ResNet44 0.66M - ResNet56 0.85M - ResNet110 1.7M + def lr_schedule(self, epoch): + """Learning Rate Schedule - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) + Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. + Called automatically every epoch as part of callbacks during training. - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 6 != 0: - raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') - # Start model definition. - num_filters = 16 - num_res_blocks = int((depth - 2) / 6) - - inputs = Input(shape=input_shape) - x = resnet_layer(inputs=inputs) - # Instantiate the stack of residual units - for stack in range(3): - for res_block in range(num_res_blocks): - strides = 1 - if stack > 0 and res_block == 0: # first layer but not first stack - strides = 2 # downsample - y = resnet_layer(inputs=x, - num_filters=num_filters, - strides=strides) - y = resnet_layer(inputs=y, - num_filters=num_filters, - activation=None) - if stack > 0 and res_block == 0: # first layer but not first stack - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - x = Activation('relu')(x) - num_filters *= 1 - - # Add classifier on top. - # v1 does not use BN after last shortcut connection-ReLU - #-- x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - x = Dense(64)(y) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(x) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - - -def resnet_v1_1(input_shape, depth, num_classes=10): - """ResNet Version 1 Model builder [a] - - Stacks of 2 x (3 x 3) Conv2D-BN-ReLU - Last ReLU is after the shortcut connection. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filters is - doubled. Within each stage, the layers have the same number filters and the - same number of filters. - Features maps sizes: - stage 0: 32x32, 16 - stage 1: 16x16, 32 - stage 2: 8x8, 64 - The Number of parameters is approx the same as Table 6 of [a]: - ResNet20 0.27M - ResNet32 0.46M - ResNet44 0.66M - ResNet56 0.85M - ResNet110 1.7M + # Arguments + epoch (int): The number of epochs - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) + # Returns + lr (float32): learning rate + """ + lr = 1e-3 + if epoch > 180: + lr *= 0.5e-3 + elif epoch > 160: + lr *= 1e-3 + elif epoch > 120: + lr *= 1e-2 + elif epoch > 80: + lr *= 1e-1 + + return lr + - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 6 != 0: - raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') - # Start model definition. - num_filters = 16 - num_res_blocks = int((depth - 2) / 6) - - inputs = Input(shape=input_shape) - x = resnet_layer(inputs=inputs) - # Instantiate the stack of residual units - for stack in range(3): - for res_block in range(num_res_blocks): - strides = 1 - if stack > 0 and res_block == 0: # first layer but not first stack - strides = 2 # downsample - y = resnet_layer(inputs=x, - num_filters=num_filters, - strides=strides) - y = resnet_layer(inputs=y, - num_filters=num_filters, - activation=None) - if stack > 0 and res_block == 0: # first layer but not first stack - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, + def resnet_v0(self, input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] + + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filters is + doubled. Within each stage, the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) + + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # Instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + if stack > 0 and res_block == 0: # first layer but not first stack + strides = 2 # downsample + y = resnet_layer(inputs=x, num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + if stack > 0 and res_block == 0: # first layer but not first stack + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + x = Activation('relu')(x) + num_filters *= 1 + + # Add classifier on top. + # v1 does not use BN after last shortcut connection-ReLU + #-- x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + x = Dense(64)(y) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(x) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + + def resnet_v1_1(self, input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] + + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filters is + doubled. Within each stage, the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) + + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # Instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + if stack > 0 and res_block == 0: # first layer but not first stack + strides = 2 # downsample + y = resnet_layer(inputs=x, + num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + if stack > 0 and res_block == 0: # first layer but not first stack + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + x = Activation('relu')(x) + num_filters *= 2 + + + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + #activation='softmax', + kernel_initializer='he_normal')(y) + + outputs = Activation('softmax')(outputs) + + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + + + def resnet_v2(self, input_shape, depth, num_classes=10): + """ResNet Version 2 Model builder [b] + + Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as + bottleneck layer + First shortcut connection per layer is 1 x 1 Conv2D. + Second and onwards shortcut connection is identity. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filter maps is + doubled. Within each stage, the layers have the same number filters and the + same filter map sizes. + Features maps sizes: + conv1 : 32x32, 16 + stage 0: 32x32, 64 + stage 1: 16x16, 128 + stage 2: 8x8, 256 + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 9 != 0: + raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') + # Start model definition. + num_filters_in = 16 + num_res_blocks = int((depth - 2) / 9) + + inputs = Input(shape=input_shape) + # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths + x = resnet_layer(inputs=inputs, + num_filters=num_filters_in, + conv_first=True) + + # Instantiate the stack of residual units + for stage in range(3): + for res_block in range(num_res_blocks): + activation = 'relu' + batch_normalization = True + strides = 1 + if stage == 0: + num_filters_out = num_filters_in * 4 + if res_block == 0: # first layer and first stage + activation = None + batch_normalization = False + else: + num_filters_out = num_filters_in * 2 + if res_block == 0: # first layer but not first stage + strides = 2 # downsample + + # bottleneck residual unit + y = resnet_layer(inputs=x, + num_filters=num_filters_in, kernel_size=1, strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - x = Activation('relu')(x) - num_filters *= 2 + activation=activation, + batch_normalization=batch_normalization, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_in, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_out, + kernel_size=1, + conv_first=False) + if res_block == 0: + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters_out, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + + num_filters_in = num_filters_out + + # Add classifier on top. + # v2 has BN-ReLU before Pooling + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(y) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + def buildModel(self): + + depth = 20 + input_shape = (3, 32, 32) - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - #activation='softmax', - kernel_initializer='he_normal')(y) + if version == 2: + model = self.resnet_v2(input_shape=input_shape, depth=depth) + else: + model = self.resnet_v1_1(input_shape=input_shape, depth=depth) - outputs = Activation('softmax')(outputs) + return model + + + def data_preprocess(self): + (X_train, y_train), (X_val, y_val) = cifar10.load_data() - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model + X_train = X_train / 255.0 + X_val = X_val / 255.0 + mean = np.mean(X_train) + std = np.std(X_train) +# X_train = (X_train - mean) / (std + 1e-7) +# X_val = (X_val - mean) / (std + 1e-7) + X_train = (X_train - mean) + X_val = (X_val - mean) -def resnet_v2(input_shape, depth, num_classes=10): - """ResNet Version 2 Model builder [b] + X_test_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_input.bin', dtype=np.float32) + Y_test_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/test_labels.bin', dtype=np.uint32) - Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as - bottleneck layer - First shortcut connection per layer is 1 x 1 Conv2D. - Second and onwards shortcut connection is identity. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filter maps is - doubled. Within each stage, the layers have the same number filters and the - same filter map sizes. - Features maps sizes: - conv1 : 32x32, 16 - stage 0: 32x32, 64 - stage 1: 16x16, 128 - stage 2: 8x8, 256 + X_test_val = X_test_val.reshape((-1,3,32,32)) - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') - # Start model definition. - num_filters_in = 16 - num_res_blocks = int((depth - 2) / 9) - - inputs = Input(shape=input_shape) - # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) - - # Instantiate the stack of residual units - for stage in range(3): - for res_block in range(num_res_blocks): - activation = 'relu' - batch_normalization = True - strides = 1 - if stage == 0: - num_filters_out = num_filters_in * 4 - if res_block == 0: # first layer and first stage - activation = None - batch_normalization = False - else: - num_filters_out = num_filters_in * 2 - if res_block == 0: # first layer but not first stage - strides = 2 # downsample - - # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) - if res_block == 0: - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) + X_tune_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_input.bin', dtype=np.float32) + Y_tune_val = np.fromfile(MODEL_PARAMS_DIR + '/resnet18_cifar10/tune_labels.bin', dtype=np.uint32) + + X_tune_val = X_tune_val.reshape((-1,3,32,32)) + + + X_test = X_test_val[:5000] + y_test= Y_test_val[:5000] - num_filters_in = num_filters_out + X_tuner = X_tune_val[:5000] + y_tuner = Y_tune_val[:5000] - # Add classifier on top. - # v2 has BN-ReLU before Pooling - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model + return X_train, y_train, X_test, y_test, X_tuner, y_tuner -depth = 20 -if version == 2: - model = resnet_v2(input_shape=input_shape, depth=depth) -else: - model = resnet_v1_1(input_shape=input_shape, depth=depth) + def trainModel(self, model, X_train, y_train, X_test, y_test): + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) + + model.compile( + loss='categorical_crossentropy', + optimizer=Adam(lr=self.lr_schedule(0)), + metrics=['accuracy'] + ) + + + lr_scheduler = LearningRateScheduler(self.lr_schedule) + + lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), + cooldown=0, + patience=5, + min_lr=0.5e-6) + + callbacks = [lr_reducer, lr_scheduler] + + # Run training, with or without data augmentation. + if not data_augmentation: + print('Not using data augmentation.') + model.fit(X_train, y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(X_test, y_test), + shuffle=True, + callbacks=callbacks) + else: + print('Using real-time data augmentation.') + # This will do preprocessing and realtime data augmentation: + datagen = ImageDataGenerator( + # set input mean to 0 over the dataset + featurewise_center=False, + # set each sample mean to 0 + samplewise_center=False, + # divide inputs by std of dataset + featurewise_std_normalization=False, + # divide each input by its std + samplewise_std_normalization=False, + # apply ZCA whitening + zca_whitening=False, + # epsilon for ZCA whitening + zca_epsilon=1e-06, + # randomly rotate images in the range (deg 0 to 180) + rotation_range=0, + # randomly shift images horizontally + width_shift_range=0.1, + # randomly shift images vertically + height_shift_range=0.1, + # set range for random shear + shear_range=0., + # set range for random zoom + zoom_range=0., + # set range for random channel shifts + channel_shift_range=0., + # set mode for filling points outside the input boundaries + fill_mode='nearest', + # value used for fill_mode = "constant" + cval=0., + # randomly flip images + horizontal_flip=True, + # randomly flip images + vertical_flip=False, + # set rescaling factor (applied before any other transformation) + rescale=None, + # set function that will be applied on each input + preprocessing_function=None, + # image data format, either "channels_first" or "channels_last" + data_format="channels_first", + # fraction of images reserved for validation (strictly between 0 and 1) + validation_split=0.0) + + # Compute quantities required for featurewise normalization + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(X_train) + + # Fit the model on the batches generated by datagen.flow(). + model.fit_generator(datagen.flow(X_train, y_train, batch_size=batch_size), + validation_data=(X_test, y_test), + epochs=epochs, verbose=1, workers=4, + callbacks=callbacks) + + return model + + -model.compile(loss='categorical_crossentropy', - optimizer=Adam(lr=lr_schedule(0)), - metrics=['accuracy']) -model.summary() -print(model_type) - -# Prepare model model saving directory. -save_dir = os.path.join(os.getcwd(), 'saved_models') -model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type -if not os.path.isdir(save_dir): - os.makedirs(save_dir) -filepath = os.path.join(save_dir, model_name) - -# Prepare callbacks for model saving and for learning rate adjustment. -checkpoint = ModelCheckpoint(filepath=filepath, - monitor='val_acc', - verbose=1, - save_best_only=True) - -lr_scheduler = LearningRateScheduler(lr_schedule) - -lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) - -callbacks = [checkpoint, lr_reducer, lr_scheduler] - -# Run training, with or without data augmentation. -if not data_augmentation: - print('Not using data augmentation.') - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True, - callbacks=callbacks) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - # set input mean to 0 over the dataset - featurewise_center=False, - # set each sample mean to 0 - samplewise_center=False, - # divide inputs by std of dataset - featurewise_std_normalization=False, - # divide each input by its std - samplewise_std_normalization=False, - # apply ZCA whitening - zca_whitening=False, - # epsilon for ZCA whitening - zca_epsilon=1e-06, - # randomly rotate images in the range (deg 0 to 180) - rotation_range=0, - # randomly shift images horizontally - width_shift_range=0.1, - # randomly shift images vertically - height_shift_range=0.1, - # set range for random shear - shear_range=0., - # set range for random zoom - zoom_range=0., - # set range for random channel shifts - channel_shift_range=0., - # set mode for filling points outside the input boundaries - fill_mode='nearest', - # value used for fill_mode = "constant" - cval=0., - # randomly flip images - horizontal_flip=True, - # randomly flip images - vertical_flip=False, - # set rescaling factor (applied before any other transformation) - rescale=None, - # set function that will be applied on each input - preprocessing_function=None, - # image data format, either "channels_first" or "channels_last" - data_format="channels_first", - # fraction of images reserved for validation (strictly between 0 and 1) - validation_split=0.0) - - # Compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), - validation_data=(x_test, y_test), - epochs=epochs, verbose=1, workers=4, - callbacks=callbacks) - -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) - - -dumpCalibrationData("calibration_data/resnet18_calib.bin", x_train, - "calibration_data/resnet18_train_labels.bin", train_labels) -sys.exit(0) +if __name__ == '__main__': + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + # Changing to NCHW format + K.set_image_data_format('channels_first') + + + ### Parameters specific to each benchmark + reload_dir = MODEL_PARAMS_DIR + '/resnet18_cifar10/' + keras_model_file = MODEL_PARAMS_DIR + '/resnet18_cifar10/weights.h5' + data_dir = '/resnet18_cifar10/' + src_dir = 'data/resnet18_cifar10_src/' + num_classes = 10 + batch_size = 500 + + model = ResNet18_CIFAR10('ResNet18_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) -#translate_to_approxhpvm(model, "resnet18_cifar10_hpvm/", x_test, test_labels) + model.run(sys.argv) -translate_to_approxhpvm(model, "resnet_test/", x_test, test_labels, 'resnet18_cifar10_promise/', y_test) diff --git a/hpvm/projects/keras/src/resnet50_imagenet.py b/hpvm/projects/keras/src/resnet50_imagenet.py index 55d0918b7a526ba1a9866d2d8e3b0e2e8608cc25..0c3006213d7880f6133e1f8030256d50d25ea35d 100644 --- a/hpvm/projects/keras/src/resnet50_imagenet.py +++ b/hpvm/projects/keras/src/resnet50_imagenet.py @@ -1,289 +1,155 @@ import os +import sys import glob -import random -import scipy -import scipy.io -import cv2 import numpy as np - import tensorflow as tf +import scipy +import scipy.io import keras -from keras.models import Sequential, Model +from keras.models import Model, Sequential from keras.layers import * -from keras.utils import to_categorical -from keras.applications.resnet50 import ResNet50, preprocess_input +from keras.optimizers import Adam +from keras import regularizers from keras import backend as K +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler -from frontend.approxhpvm_translator import translate_to_approxhpvm -from frontend.weight_utils import dumpCalibrationData2 - - -np.random.seed(2020) - -os.environ["CUDA_VISIBLE_DEVICES"] = "1" - -K.set_image_data_format('channels_first') - -data_format = 'channels_first' - - -IMAGENET_DIR = '/home/nz11/ILSVRC2012/' -OUTPUT_DIR = 'data/resnet50_imagenet_tune_regenerate/' -WEIGHTS_PATH = 'data/resnet50_imagenet/weights.h5' - -NUM_CLASSES = 200 -IMAGES_PER_CLASS = 40 -# VAL_SIZE = 100 - - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - filters1, filters2, filters3 = filters - bn_axis = 1 - - x = Conv2D(filters1, (1, 1))(input_tensor) - x = BatchNormalization(axis=bn_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(filters2, kernel_size, - padding='same')(x) - x = BatchNormalization(axis=bn_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(filters3, (1, 1))(x) - x = BatchNormalization(axis=bn_axis)(x) - - x = add([x, input_tensor]) - x = Activation('relu')(x) - return x - - -def conv_block(input_tensor, - kernel_size, - filters, - stage, - block, - strides=(2, 2)): - filters1, filters2, filters3 = filters - bn_axis = 1 - x = Conv2D(filters1, (1, 1), strides=strides)(input_tensor) - x = BatchNormalization(axis=bn_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(filters2, kernel_size, padding='same')(x) - x = BatchNormalization(axis=bn_axis)(x) - x = Activation('relu')(x) - - x = Conv2D(filters3, (1, 1))(x) - x = BatchNormalization(axis=bn_axis)(x) - - shortcut = Conv2D(filters3, (1, 1), strides=strides)(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis)(shortcut) - - x = add([x, shortcut]) - x = Activation('relu')(x) - return x - - -def get_resnet50_nchw_keras(): - - img_input = Input(shape=(3, 224, 224)) - bn_axis = 1 - - x = ZeroPadding2D((3, 3))(img_input) - x = Conv2D(64, (7, 7), strides=(2, 2))(x) -# x = BatchNormalization(axis=bn_axis)(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - x = BatchNormalization(axis=bn_axis)(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - x = AveragePooling2D((7, 7))(x) - x = Flatten()(x) - x = Dense(1000)(x) - x = Activation('softmax')(x) - - model = Model(img_input, x) - - - original_model = ResNet50() - - for i in range(len(original_model.layers)): - try: - model.layers[i].set_weights(original_model.layers[i].get_weights()) -# model.layers[i].trainable = False - except: - print (i, 'skipped') - model.layers[5].set_weights(original_model.layers[3].get_weights()) +class ResNet50(Benchmark): - - return model - - -def load_image(x): - - image = cv2.imread(x) + def buildModel(self): - height, width, _ = image.shape - new_height = height * 256 // min(image.shape[:2]) - new_width = width * 256 // min(image.shape[:2]) - image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC) - - height, width, _ = image.shape - startx = width // 2 - (224 // 2) - starty = height // 2 - (224 // 2) - image = image[starty:starty + 224, startx:startx + 224] - - image = image[:, :, ::-1] - image = np.transpose(image, (2, 0, 1)) - image = preprocess_input(image.astype(np.float32), data_format=data_format) + def identity_block(input_tensor, kernel_size, filters, stage, block): + filters1, filters2, filters3 = filters + bn_axis = 1 + + x = Conv2D(filters1, (1, 1))(input_tensor) + x = BatchNormalization(axis=bn_axis)(x) + x = Activation('relu')(x) + + x = Conv2D(filters2, kernel_size, + padding='same')(x) + x = BatchNormalization(axis=bn_axis)(x) + x = Activation('relu')(x) + + x = Conv2D(filters3, (1, 1))(x) + x = BatchNormalization(axis=bn_axis)(x) + + x = add([x, input_tensor]) + x = Activation('relu')(x) + return x + + def conv_block(input_tensor, + kernel_size, + filters, + stage, + block, + strides=(2, 2)): + filters1, filters2, filters3 = filters + bn_axis = 1 + x = Conv2D(filters1, (1, 1), strides=strides)(input_tensor) + x = BatchNormalization(axis=bn_axis)(x) + x = Activation('relu')(x) + + x = Conv2D(filters2, kernel_size, padding='same')(x) + x = BatchNormalization(axis=bn_axis)(x) + x = Activation('relu')(x) + + x = Conv2D(filters3, (1, 1))(x) + x = BatchNormalization(axis=bn_axis)(x) + + shortcut = Conv2D(filters3, (1, 1), strides=strides)(input_tensor) + shortcut = BatchNormalization( + axis=bn_axis)(shortcut) + + x = add([x, shortcut]) + x = Activation('relu')(x) + return x + + img_input = Input(shape=(3, 224, 224)) + bn_axis = 1 + + x = ZeroPadding2D((3, 3))(img_input) + x = Conv2D(64, (7, 7), strides=(2, 2))(x) + # x = BatchNormalization(axis=bn_axis)(x) + x = Activation('relu')(x) + x = MaxPooling2D((3, 3), strides=(2, 2))(x) + x = BatchNormalization(axis=bn_axis)(x) + + x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) + x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') + x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') + + x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') + x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') + + x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') + x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') + + x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') + x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') + + x = AveragePooling2D((7, 7))(x) + x = Flatten()(x) + x = Dense(1000)(x) + x = Activation('softmax')(x) + + model = Model(img_input, x) - return image.astype(np.float32) - - -meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat') -original_idx_to_synset = {} -synset_to_name = {} - -for i in range(1000): - ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0]) - synset = meta['synsets'][i,0][1][0] - name = meta['synsets'][i,0][2][0] - original_idx_to_synset[ilsvrc2012_id] = synset - synset_to_name[synset] = name - -synset_to_keras_idx = {} -keras_idx_to_name = {} -f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r') -c = 0 -for line in f: - parts = line.split(' ') - synset_to_keras_idx[parts[0]] = c - keras_idx_to_name[c] = ' '.join(parts[1:]) - c += 1 -f.close() - + return model - - -model = get_resnet50_nchw_keras() - -X_tune, X_test = [], [] -y_tune, y_true = [], [] - -classes = glob.glob(IMAGENET_DIR + 'val/*') - -for c in np.random.permutation(len(classes))[:NUM_CLASSES]: - x = glob.glob(classes[c] + '/*') - x = np.array(x) - - idx = np.random.permutation(len(x)) - idx = idx[:max(len(idx), IMAGES_PER_CLASS)] - - synset = classes[c].split('/')[-1] - images = list(map(lambda x : load_image(x), x[idx])) - labels = [synset_to_keras_idx[synset]] * len(x[idx]) - X_test += images[:IMAGES_PER_CLASS // 2] - y_true += labels[:IMAGES_PER_CLASS // 2] - - X_tune += images[IMAGES_PER_CLASS // 2:] - y_tune += labels[IMAGES_PER_CLASS // 2:] - - -X_test = np.array(X_test) -y_true = np.array(y_true) -X_tune = np.array(X_tune) -y_tune = np.array(y_tune) - -print ('tune size', len(X_tune)) -print ('test size', len(X_test)) - - - - - - -def train_helper(x): - - try: - x = x.decode('utf-8') - except: - pass - - image = load_image(x) - - y = np.zeros(1000, dtype=np.uint8) + def data_preprocess(self): + X_train, y_train = None, None - y[synset_to_keras_idx[x.split('/')[-2]]]= 1 + X_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_input.bin', dtype=np.float32) + X_test = X_test.reshape((-1, 3, 224, 224)) + y_test = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/test_labels.bin', dtype=np.uint32) - return image, y - - - -train_images = glob.glob(IMAGENET_DIR + 'train/*/*') -random.shuffle(train_images) - -dataset = tf.data.Dataset().from_tensor_slices(train_images) -dataset = dataset.map( - lambda x : tf.py_func(train_helper, [x], [tf.float32, tf.uint8]), - num_parallel_calls=16 -) + X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/tune_input.bin', dtype=np.float32) + X_tuner = X_tuner.reshape((-1, 3, 224, 224)) + y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/resnet50_imagenet/tune_labels.bin', dtype=np.uint32) + + return X_train, y_train, X_test, y_test, X_tuner, y_tuner + -dataset = dataset.shuffle(buffer_size=1000) -dataset = dataset.batch(64) -dataset = dataset.repeat() + def trainModel(self, model): -next_element = dataset.make_one_shot_iterator().get_next() + assert False, "ImageNet training not supported - use Pretrained weights" -sess = tf.Session() -def generate(): - while True: - yield sess.run(next_element) +if __name__ == '__main__': + + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + # Changing to NCHW format + K.set_image_data_format('channels_first') -model.compile(optimizer=keras.optimizers.Adam(lr=0.00001), loss='categorical_crossentropy', metrics=['acc']) + ### Parameters specific to each benchmark + reload_dir = MODEL_PARAMS_DIR + '/resnet50_imagenet/' + keras_model_file = MODEL_PARAMS_DIR + '/resnet50_imagenet/weights.h5' + data_dir = '/resnet50_imagenet/' + src_dir = 'data/resnet50_imagenet_src/' + num_classes = 1000 + batch_size = 50 -if os.path.exists(WEIGHTS_PATH): - model.load_weights(WEIGHTS_PATH) -else: - pass -# model.fit_generator(generate(), steps_per_epoch=1000, validation_data=(X_test, to_categorical(y_true, num_classes=1000)), epochs=6) -# model.save_weights(OUTPUT_DIR + 'weights.h5') - -translate_to_approxhpvm(model, OUTPUT_DIR, X_tune, y_tune, 1000) - -# # dumpCalibrationData2(OUTPUT_DIR + 'test_input_10K.bin', X_test, OUTPUT_DIR + 'test_labels_10K.bin', y_true) -# dumpCalibrationData2(OUTPUT_DIR + 'tune_input.bin', X_tune, OUTPUT_DIR + 'tune_labels.bin', y_tune) -# dumpCalibrationData2(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true) + model = ResNet50('ResNet50_imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) + + model.run(sys.argv) -pred = np.argmax(model.predict(X_test), axis=1) -print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test)) - -# pred = np.argmax(model.predict(X_tune), axis=1) -# print ('val accuracy', np.sum(pred == y_tune.ravel()) / len(X_tune)) \ No newline at end of file diff --git a/hpvm/projects/keras/src/vgg16_cifar10.py b/hpvm/projects/keras/src/vgg16_cifar10.py index df1bcc6b0f414a8ba7cba8911e0d87ff0fbcd951..873e23b766ffbd58c1d5db89141da60fee88126e 100644 --- a/hpvm/projects/keras/src/vgg16_cifar10.py +++ b/hpvm/projects/keras/src/vgg16_cifar10.py @@ -1,34 +1,34 @@ +import os +import sys +import glob - -from __future__ import print_function -import keras -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D, BatchNormalization -from keras import optimizers import numpy as np -from keras.layers.core import Lambda -from keras import backend as K +import tensorflow as tf +import scipy +import scipy.io +import keras +from keras.models import Model, Sequential +from keras.layers import * +from keras.optimizers import Adam from keras import regularizers -import os -import sys +from keras import backend as K +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler + +from keras.datasets import cifar10 from Benchmark import Benchmark -from frontend.approxhpvm_translator import translate_to_approxhpvm -from frontend.weight_utils import dumpCalibrationData +from Config import MODEL_PARAMS_DIR class VGG16_CIFAR10(Benchmark): - - def buildModel(self): # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. self.weight_decay = 0.0005 - self.x_shape = [3,32,32] + self.x_shape = [3, 32, 32] model = Sequential() weight_decay = self.weight_decay @@ -97,55 +97,40 @@ class VGG16_CIFAR10(Benchmark): model.add(Activation('softmax')) return model - - def normalize(self,X_train,X_test): - #this function normalize inputs for zero mean and unit variance - # it is used when training a model. - # Input: training set and test set - # Output: normalized training set and test set according to the trianing set statistics. - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train, axis=(0, 1, 2, 3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) - return X_train, X_test - - + def data_preprocess(self): - (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() - #X_train = X_train / 255.0 - #X_test = X_test / 255.0 + (X_train, y_train), (X_val, y_val) = cifar10.load_data() - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train,axis=(0,1,2,3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) + X_train = X_train / 255.0 + X_val = X_val / 255.0 - return X_train, Y_train, X_test, Y_test + mean = np.mean(X_train) + std = np.std(X_train) + X_train = (X_train - mean) / (std + 1e-7) + X_val = (X_val - mean) / (std + 1e-7) + X_test = X_val[0:5000] + y_test = y_val[0:5000] + X_tuner = X_val[5000:] + y_tuner = y_val[5000:] + + return X_train, y_train, X_test, y_test, X_tuner, y_tuner - - def trainModel(self, model): + def trainModel(self, model, X_train, y_train, X_test, y_test): - #training parameters + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) + batch_size = 128 - #maxepoches = 250 - maxepoches = 30 learning_rate = 0.01 - lr_decay = 1e-6 lr_drop = 20 - # The data, shuffled and split between train and test sets: - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train, y_train, x_test, y_test = self.data_preprocess() - - y_train = keras.utils.to_categorical(y_train, self.num_classes) - y_test = keras.utils.to_categorical(y_test, self.num_classes) + def lr_scheduler(epoch): return learning_rate * (0.5 ** (epoch // lr_drop)) + reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) #data augmentation @@ -161,46 +146,46 @@ class VGG16_CIFAR10(Benchmark): horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) + datagen.fit(X_train) - #optimization details - sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) - model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) + model.compile( + loss='categorical_crossentropy', + optimizer=keras.optimizers.SGD(lr=learning_rate, decay=1e-6, momentum=0.9, nesterov=True), + metrics=['accuracy'] + ) - - # training process in a for loop with learning rate drop every 25 epoches. - - historytemp = model.fit_generator(datagen.flow(x_train, y_train, - batch_size=batch_size), - steps_per_epoch=x_train.shape[0] // batch_size, - epochs=maxepoches, - validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) + # training process in a for loop with learning rate drop every 20 epoches. + + model.fit_generator( + datagen.flow(X_train, y_train, batch_size=batch_size), + steps_per_epoch=X_train.shape[0] // batch_size, + epochs=250, + validation_data=(X_test, y_test), + callbacks=[reduce_lr] + ) return model - - - - -if __name__ == "__main__": - +if __name__ == '__main__': - os.environ["CUDA_VISIBLE_DEVICES"] = "0" + os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Changing to NCHW format K.set_image_data_format('channels_first') ### Parameters specific to each benchmark - reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar10/" - keras_model_file = "vgg16_cifar10.h5" - hpvm_dir = "data/vgg16_cifar10/" + reload_dir = MODEL_PARAMS_DIR + '/vgg16_cifar10/' + keras_model_file = MODEL_PARAMS_DIR + '/vgg16_cifar10/weights.h5' + data_dir = '/vgg16_cifar10/' + src_dir = 'data/vgg16_cifar10_src/' num_classes = 10 + batch_size = 500 - vgg16_cifar10 = VGG16_CIFAR10("vgg16_cifar10", reload_dir, keras_model_file, hpvm_dir, num_classes) + model = VGG16_CIFAR10('VGG16_CIFAR10', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) - vgg16_cifar10.run(sys.argv) + model.run(sys.argv) diff --git a/hpvm/projects/keras/src/vgg16_cifar100.py b/hpvm/projects/keras/src/vgg16_cifar100.py index 41d0816ecd7c18a22448a78b9e0a4b1a573d40a7..03bb852e00bb61a7b17836f5c4df5bbf56c4b466 100644 --- a/hpvm/projects/keras/src/vgg16_cifar100.py +++ b/hpvm/projects/keras/src/vgg16_cifar100.py @@ -1,32 +1,34 @@ - -from __future__ import print_function import os import sys -import keras -from keras.datasets import cifar100 -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras import optimizers +import glob + import numpy as np -from keras.layers.core import Lambda -from keras import backend as K +import tensorflow as tf +import scipy +import scipy.io +import keras +from keras.models import Model, Sequential +from keras.layers import * +from keras.optimizers import Adam from keras import regularizers +from keras import backend as K +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler + +from keras.datasets import cifar100 from Benchmark import Benchmark -from frontend.weight_utils import dumpCalibrationData -from frontend.approxhpvm_translator import translate_to_approxhpvm +from Config import MODEL_PARAMS_DIR class VGG16_CIFAR100(Benchmark): - def buildModel(self): # Build the network of vgg for 100 classes self.weight_decay = 0.0005 - self.x_shape = [3,32,32] + self.x_shape = [3, 32, 32] model = Sequential() weight_decay = self.weight_decay @@ -112,40 +114,35 @@ class VGG16_CIFAR100(Benchmark): return model - def data_preprocess(self): - (X_train, Y_train), (X_test, Y_test) = cifar100.load_data() + (X_train, y_train), (X_val, y_val) = cifar100.load_data() + + X_train = X_train / 255.0 + X_val = X_val / 255.0 - mean = np.mean(X_train,axis=(0,1,2,3)) - std = np.std(X_train,axis=(0,1,2,3)) - X_train = (X_train-mean)/(std+1e-7) - X_test = (X_test-mean)/(std+1e-7) + mean = np.mean(X_train) + std = np.std(X_train) + X_train = (X_train - mean) / (std + 1e-7) + X_val = (X_val - mean) / (std + 1e-7) - return X_train, Y_train, X_test, Y_test + X_test = X_val[0:5000] + y_test = y_val[0:5000] + X_tuner = X_val[5000:] + y_tuner = y_val[5000:] + return X_train, y_train, X_test, y_test, X_tuner, y_tuner - def trainModel(self,model): + def trainModel(self,model, X_train, y_train, X_test, y_test): - #training parameters + y_train = to_categorical(y_train, self.num_classes) + y_test = to_categorical(y_test, self.num_classes) + batch_size = 128 - #maxepoches = 250 - #maxepoches = 400 - maxepoches = 4 - learning_rate = 0.05 - lr_decay = 1e-6 - lr_drop = 20 - - # The data, shuffled and split between train and test sets: - (x_train, y_train), (x_test, y_test) = cifar100.load_data() - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train, x_test = self.normalize(x_train, x_test) - - y_train = keras.utils.to_categorical(y_train, self.num_classes) - y_test = keras.utils.to_categorical(y_test, self.num_classes) - + learning_rate = 0.1 + lr_drop = 30 + def lr_scheduler(epoch): return learning_rate * (0.5 ** (epoch // lr_drop)) @@ -165,46 +162,45 @@ class VGG16_CIFAR100(Benchmark): horizontal_flip=True, # randomly flip images vertical_flip=False) # randomly flip images # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - + datagen.fit(X_train) - #optimization details - sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) - model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) + model.compile( + loss='categorical_crossentropy', + optimizer=optimizers.Adam(lr=learning_rate), + metrics=['accuracy'] + ) # training process in a for loop with learning rate drop every 25 epoches. - historytemp = model.fit_generator(datagen.flow(x_train, y_train, - batch_size=batch_size), - steps_per_epoch=x_train.shape[0] // batch_size, - epochs=maxepoches, - validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) + + model.fit_generator( + datagen.flow(X_train, y_train, batch_size=batch_size), + steps_per_epoch=X_train.shape[0] // batch_size, + epochs=250, + validation_data=(X_test, y_test), + callbacks=[reduce_lr] + ) - ##### model.save_weights('cifar100vgg.h5') return model - - - - -if __name__ == "__main__": +if __name__ == '__main__': - - os.environ["CUDA_VISIBLE_DEVICES"] = "0" + os.environ['CUDA_VISIBLE_DEVICES'] = '0' # Changing to NCHW format K.set_image_data_format('channels_first') ### Parameters specific to each benchmark - reload_dir = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/vgg16_cifar100/" - keras_model_file = "vgg16_cifar100.h5" - hpvm_dir = "data/vgg16_cifar100/" + reload_dir = MODEL_PARAMS_DIR + '/vgg16_cifar100/' + keras_model_file = MODEL_PARAMS_DIR + '/vgg16_cifar100/weights.h5' + data_dir = '/vgg16_cifar100/' + src_dir = 'data/vgg16_cifar100_src/' num_classes = 100 + batch_size = 100 - vgg16_cifar100 = VGG16_CIFAR100("vgg16_cifar100", reload_dir, keras_model_file, hpvm_dir, num_classes) + model = VGG16_CIFAR100('VGG16_CIFAR100', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) - vgg16_cifar100.run(sys.argv) - + model.run(sys.argv) diff --git a/hpvm/projects/keras/src/vgg16_imagenet.py b/hpvm/projects/keras/src/vgg16_imagenet.py index b41df8b83a966495d6a1c8281745e91181a66c41..35ab92479e545ba44cf2175cb7b8abcec84c4fed 100644 --- a/hpvm/projects/keras/src/vgg16_imagenet.py +++ b/hpvm/projects/keras/src/vgg16_imagenet.py @@ -1,233 +1,140 @@ import os +import sys import glob -import random -import scipy -import scipy.io -import cv2 import numpy as np - import tensorflow as tf +import scipy +import scipy.io import keras -from keras.models import Sequential, Model +from keras.models import Model, Sequential from keras.layers import * -from keras.utils import to_categorical -from keras.applications.vgg16 import VGG16, preprocess_input +from keras.optimizers import Adam +from keras import regularizers from keras import backend as K - -from frontend.approxhpvm_translator import translate_to_approxhpvm -from frontend.weight_utils import dumpCalibrationData2 - - -np.random.seed(2020) - -os.environ["CUDA_VISIBLE_DEVICES"] = "1" - -K.set_image_data_format('channels_first') - -data_format = 'channels_first' - - -IMAGENET_DIR = '/home/nz11/ILSVRC2012/' -OUTPUT_DIR = 'data/vgg16_imagenet_tune/' - -NUM_CLASSES = 200 -IMAGES_PER_CLASS = 50 -# VAL_SIZE = 100 - - - -def get_vgg16_nchw_keras(): - img_input = Input(shape=(3, 224, 224)) - - # Block 1 - x = Conv2D(64, (3, 3), - padding='same', - data_format=data_format)(img_input) - x = Activation('relu')(x) - x = Conv2D(64, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x) - - # Block 2 - x = Conv2D(128, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(128, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x) - - # Block 3 - x = Conv2D(256, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(256, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(256, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x) - - # Block 4 - x = Conv2D(512, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(512, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(512, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x) - - # Block 5 - x = Conv2D(512, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(512, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = Conv2D(512, (3, 3), - padding='same', - data_format=data_format)(x) - x = Activation('relu')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), data_format=data_format)(x) - - x = Flatten(data_format='channels_last')(x) - - x = Dense(4096)(x) - x = Activation('relu')(x) - x = Dense(4096)(x) - x = Activation('relu')(x) - x = Dense(1000)(x) - x = Activation('softmax')(x) - - model_nchw = Model(img_input, x) - - - model = VGG16() - - j = 0 - for i in range(len(model_nchw.layers)): - if 'padding' in model_nchw.layers[i].name or 'activation' in model_nchw.layers[i].name: - continue - try: - model_nchw.layers[i].set_weights(model.layers[j].get_weights()) - except: - print (i, model_nchw.layers[i], 'skipped') - j += 1 - - - return model_nchw - +from keras.utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras.callbacks import LearningRateScheduler + +from Benchmark import Benchmark +from Config import MODEL_PARAMS_DIR + + + +class VGG16(Benchmark): + + def buildModel(self): + img_input = Input(shape=(3, 224, 224)) + + # Block 1 + x = ZeroPadding2D(padding=(1, 1))(img_input) + x = Conv2D(64, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(64, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D((2, 2), strides=(2, 2))(x) + + # Block 2 + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(128, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(128, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D((2, 2), strides=(2, 2))(x) + + # Block 3 + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(256, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(256, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(256, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D((2, 2), strides=(2, 2))(x) + + # Block 4 + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(512, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(512, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(512, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D((2, 2), strides=(2, 2))(x) + + # Block 5 + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(512, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(512, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = ZeroPadding2D(padding=(1, 1))(x) + x = Conv2D(512, (3, 3), padding='valid')(x) + x = Activation('relu')(x) + x = MaxPooling2D((2, 2), strides=(2, 2))(x) + + # x = Flatten(data_format='channels_first')(x) + x = Flatten()(x) + + x = Dense(4096)(x) + x = Activation('relu')(x) + x = Dropout(0.5)(x) + x = Dense(4096)(x) + x = Activation('relu')(x) + x = Dropout(0.5)(x) + x = Dense(1000)(x) + x = Activation('softmax')(x) + + model = Model(img_input, x) + + return model -def load_image(x): - - image = cv2.imread(x) + def data_preprocess(self): + X_train, y_train = None, None - height, width, _ = image.shape - new_height = height * 256 // min(image.shape[:2]) - new_width = width * 256 // min(image.shape[:2]) - image = cv2.resize(image, (new_width, new_height), interpolation=cv2.INTER_CUBIC) - - height, width, _ = image.shape - startx = width // 2 - (224 // 2) - starty = height // 2 - (224 // 2) - image = image[starty:starty + 224, startx:startx + 224] - - image = image[:, :, ::-1] - image = np.transpose(image, (2, 0, 1)) - image = preprocess_input(image.astype(np.float32), data_format=data_format) + X_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_input.bin', dtype=np.float32) + X_test = X_test.reshape((-1, 3, 224, 224)) + y_test = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/test_labels.bin', dtype=np.uint32) - return image.astype(np.float32) - - -meta = scipy.io.loadmat(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/meta.mat') -original_idx_to_synset = {} -synset_to_name = {} - -for i in range(1000): - ilsvrc2012_id = int(meta['synsets'][i,0][0][0][0]) - synset = meta['synsets'][i,0][1][0] - name = meta['synsets'][i,0][2][0] - original_idx_to_synset[ilsvrc2012_id] = synset - synset_to_name[synset] = name - -synset_to_keras_idx = {} -keras_idx_to_name = {} -f = open(IMAGENET_DIR + 'ILSVRC2012_devkit_t12/data/synset_words.txt', 'r') -c = 0 -for line in f: - parts = line.split(' ') - synset_to_keras_idx[parts[0]] = c - keras_idx_to_name[c] = ' '.join(parts[1:]) - c += 1 -f.close() - - - - -model = get_vgg16_nchw_keras() - -X_tune, X_test = [], [] -y_tune, y_true = [], [] - -classes = glob.glob(IMAGENET_DIR + 'val/*') - -for c in np.random.permutation(len(classes))[:NUM_CLASSES]: - x = glob.glob(classes[c] + '/*') - x = np.array(x) - - idx = np.random.permutation(len(x)) - idx = idx[:max(len(idx), IMAGES_PER_CLASS)] + X_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/tune_input.bin', dtype=np.float32) + X_tuner = X_tuner.reshape((-1, 3, 224, 224)) + y_tuner = np.fromfile(MODEL_PARAMS_DIR + '/vgg16_imagenet/tune_labels.bin', dtype=np.uint32) + + return X_train, y_train, X_test, y_test, X_tuner, y_tuner - synset = classes[c].split('/')[-1] - images = list(map(lambda x : load_image(x), x[idx])) - labels = [synset_to_keras_idx[synset]] * len(x[idx]) - X_test += images[:IMAGES_PER_CLASS // 2] - y_true += labels[:IMAGES_PER_CLASS // 2] - - X_tune += images[IMAGES_PER_CLASS // 2:] - y_tune += labels[IMAGES_PER_CLASS // 2:] - - -X_test = np.array(X_test) -y_true = np.array(y_true) -X_tune = np.array(X_tune) -y_tune = np.array(y_tune) + def trainModel(self, model): -print ('tune size', len(X_tune)) -print ('test size', len(X_test)) + assert False, "ImageNet training not supported - use Pretrained weights" + +if __name__ == '__main__': -translate_to_approxhpvm(model, OUTPUT_DIR, X_tune, y_tune, 1000) - -# # dumpCalibrationData2(OUTPUT_DIR + 'test_input_10K.bin', X_test, OUTPUT_DIR + 'test_labels_10K.bin', y_true) -# dumpCalibrationData2(OUTPUT_DIR + 'tune_input.bin', X_tune, OUTPUT_DIR + 'tune_labels.bin', y_tune) -# dumpCalibrationData2(OUTPUT_DIR + 'test_input.bin', X_test, OUTPUT_DIR + 'test_labels.bin', y_true) + os.environ['CUDA_VISIBLE_DEVICES'] = '0' + # Changing to NCHW format + K.set_image_data_format('channels_first') + ### Parameters specific to each benchmark + reload_dir = MODEL_PARAMS_DIR + '/vgg16_imagenet/' + keras_model_file = MODEL_PARAMS_DIR + '/vgg16_imagenet/weights.h5' + data_dir = '/vgg16_imagenet/' + src_dir = 'data/vgg16_imagenet_src/' + num_classes = 1000 + batch_size = 50 -pred = np.argmax(model.predict(X_test), axis=1) -print ('val accuracy', np.sum(pred == y_true.ravel()) / len(X_test)) + alexnet = VGG16('VGG16_imagenet', reload_dir, keras_model_file, data_dir, src_dir, num_classes, batch_size) -# pred = np.argmax(model.predict(X_tune), axis=1) -# print ('val accuracy', np.sum(pred == y_tune.ravel()) / len(X_tune)) + alexnet.run(sys.argv) + + \ No newline at end of file diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp index 41f4334c420f77e1a62829798e06bca2899b8e80..dd689d202a91755ecad116a3d1277f59c740d0b1 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet2_cifar10/alexnet2_cifar10.cpp @@ -415,7 +415,7 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet2_cifar10/"; - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3); @@ -497,7 +497,7 @@ int main() { int test_input_size = 5000; int batch_count = test_input_size / batch_size; - std::string input_path = dir_prefix + std::string("tune_input.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); // void* input = create4DTensor(0,nchw,batch_size,3,32,32); startMemTracking(); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp index e5edc8a5890cdbf51bba1ed0effdf64b2297d29a..ae5f31b7dcca3ec59920e0dcc0ba34ca5ea28cbc 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_cifar10/alexnet_cifar10.cpp @@ -366,9 +366,9 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_cifar10/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); uint8_t *labels = readLabels(labels_path.c_str(), 5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp index 87aff828551b2c778098379728b79ca2cb440918..d49c0d2d06b1ea04ad78ee72dc2776bd000dacfd 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/alexnet_imagenet/alexnet_imagenet.cpp @@ -465,8 +465,8 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/alexnet_imagenet/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 11, 11); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt index b4e51dff426f4d3c5cb7b9572e6aa5940212acbd..32a9642d38ab816246b9e5cca01c6efcec3a2d8d 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/data/tuner_confs.txt @@ -1,8 +1,8 @@ 282.5141369999999 +++++ conf1 1 1 98.7 0.0 -1 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 -2 gpu conv fp32 11 add fp32 1 pool_max fp32 1 tanh fp32 1 +1 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 +2 gpu conv fp32 11 add fp32 1 tanh fp32 1 pool_max fp32 1 3 gpu mul fp32 11 add fp32 1 tanh fp32 1 4 gpu mul fp32 11 add fp32 1 tanh fp32 1 5 gpu softmax fp32 1 diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp index a20315cb9c36610aac2d0d43059182302674b83b..b67d585d01b4809d4107d95ab4476e741f13dd7c 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/lenet_mnist/lenet_mnist.cpp @@ -268,8 +268,8 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/lenet_mnist/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp index 5ea5c298bf7b5858af024aff7a4ee81c4b8a6ed2..a4de2826216d9bf6b3843e466097abae35ca8b72 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/mobilenet_cifar10/mobilenet_cifar10.cpp @@ -1968,8 +1968,8 @@ typedef struct __attribute__((__packed__)) { int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/mobilenet_cifar10/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 32, 3, 3, 3); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp index c6fa02c784f90f8c03a81991763e533d864b9ed0..66ab37cd33e502df35f73ca2b3addb1c4be53808 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet18_cifar10/resnet18_cifar10.cpp @@ -1303,9 +1303,9 @@ typedef struct __attribute__((__packed__)) { int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet18_cifar10/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); // void* input = readTrainedWeights(input_path.c_str(), 0,5000,3,32,32); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); // uint32_t* labels = readLabels3(labels_path.c_str(),5000); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp index b41e0bc96df83a91f5656e7094e914e8d86e6df5..db6b64daa0d214017ebcf968067fe44f40aa9c06 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/resnet50_imagenet/resnet50_imagenet.cpp @@ -5136,8 +5136,8 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/resnet50_imagenet/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 7, 7); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp index 13d150e7a946296e8ce5c7fb9e128a91dedbe534..39c2ffc8769c8b8f13b359e56f4e138dff0fed98 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar10/vgg16_cifar10.cpp @@ -830,8 +830,8 @@ typedef struct __attribute__((__packed__)) { int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar10/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp index 6e26f89b755db90853ce90180ab179b6df421827..ce899cd0a24776bd5a7c8b51f13e0dac698b3495 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_cifar100/vgg16_cifar100.cpp @@ -831,8 +831,8 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_cifar100/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); @@ -998,7 +998,7 @@ int main() { startMemTracking(); startProfiling(); - for (int j = 0; j < 14; j++) { + for (int j = 0; j < 1; j++) { for (int i = 0; i < batch_count; i++) { int start = i * batch_size; diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp index 4fad931efc4988cebdf317dc0761c9146cebab0f..91af01fe8eb7deacb47cc42f3fe6cbb620adc000 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/vgg16_imagenet/vgg16_imagenet.cpp @@ -879,8 +879,8 @@ int main() { std::string dir_prefix = std::string(MODEL_PARAMS_DIR) + "/vgg16_imagenet/"; - std::string input_path = dir_prefix + std::string("tune_input.bin"); - std::string labels_path = dir_prefix + std::string("tune_labels.bin"); + std::string input_path = dir_prefix + std::string("test_input.bin"); + std::string labels_path = dir_prefix + std::string("test_labels.bin"); std::string conv2d_1_w_path = dir_prefix + std::string("conv2d_1_w.bin"); void *conv2d_1_w = readTrainedWeights(conv2d_1_w_path.c_str(), 0, 64, 3, 3, 3); diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h index a627f83e6b2aa9f38b09d82ee94ce35da1a6bafe..71e1c268726e1fb77b0713599928262b95bd64f5 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h +++ b/hpvm/test/dnn_benchmarks/hpvm-c/include/tensorUtils.h @@ -318,13 +318,13 @@ struct Tensor* readInputBatch(const char* file_name, int data_type, int start, int end, int dim2_size, int dim3_size, int dim4_size){ - int dim1_size = end - start; + long int dim1_size = end - start; // FIXIT: Don't assume floating point types - int type_size = 4; // NOTE: Assuming floating point tensors + long int type_size = 4; // NOTE: Assuming floating point tensors long int num_elems = dim1_size * dim2_size * dim3_size * dim4_size; long int size_in_bytes = type_size * dim1_size * dim2_size * dim3_size * dim4_size; float* tensor_data = (float*) malloc(sizeof(float) * num_elems); - int file_header_size = type_size * start * dim2_size * dim3_size * dim4_size; + long int file_header_size = type_size * start * dim2_size * dim3_size * dim4_size; FILE* file = fopen(file_name, "rb"); if(file == NULL){ @@ -335,9 +335,9 @@ struct Tensor* readInputBatch(const char* file_name, int data_type, fseek(file, file_header_size, SEEK_SET); // Skipping the file header size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); - fclose(file); - + + //printf ("FIXED input BATCH read \n"); struct Tensor* weights = (struct Tensor*) create4DTensor(data_type, nchw, dim1_size, dim2_size, dim3_size, dim4_size);