diff --git a/llvm/projects/keras/README.md b/llvm/projects/keras/README.md new file mode 100644 index 0000000000000000000000000000000000000000..1f1133adb0a6266a2cccb4e06d13e537756e8847 --- /dev/null +++ b/llvm/projects/keras/README.md @@ -0,0 +1,17 @@ + + +## Exporting Conda Environment: + +conda activate ${ENV_TO_EXPORT} +conda env export > environment.yml + +## Importing Conda Environment: + +conda create -f environment.yml + +## Building and Installing Frontend: + +python setup.py build +python setup.py install + + diff --git a/llvm/projects/keras/frontend/__init__.py b/llvm/projects/keras/frontend/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/llvm/projects/keras/frontend/approxhpvm_translator.py b/llvm/projects/keras/frontend/approxhpvm_translator.py new file mode 100644 index 0000000000000000000000000000000000000000..2cb8c5d74143e7c7ed6e9f536cb90f8ee8a468ad --- /dev/null +++ b/llvm/projects/keras/frontend/approxhpvm_translator.py @@ -0,0 +1,878 @@ + +import sys +import numpy as np +from frontend.promise_translator import PromiseRtTranslator +from frontend.hpvm_dfg_translator import HPVMTranslator +from frontend.weight_utils import dumpLabels, dumpData, dumpConvWeights, dumpFcWeights, dumpFcBias +import keras +import os + + + +class DFG: + + root_set = False; + + def __init__(self): + self.node_map = {} + self.root_node = None + self.last_node = None + + + def hasSingleInput(self, layer): + layer_name = layer.__class__.__name__ + + singleInLayers = {} + singleInLayers["Conv2D"] = True + singleInLayers["Dense"] = True + singleInLayers["MaxPooling2D"] = True + singleInLayers["Activation"] = True + singleInLayers["BatchNormalization"] = True + singleInLayers["Flatten"] = True + + if layer_name in singleInLayers: + return True + + return False + + + def hasMultipleInputs(self, layer): + layer_name = layer.__class__.__name__ + + multipleInLayers = {} + multipleInLayers["Add"] = True + + if layer_name in multipleInLayers: + return True + + return False + + + + def add_dfg_edge(self, inbound_node_name, dfg_node): + + inbound_node_name = inbound_node_name.split(":")[0] + inbound_node_name = inbound_node_name.split("/")[0] + if inbound_node_name in self.node_map: + inbound_node = self.node_map[inbound_node_name] + print (inbound_node_name, " found!") + inbound_node.add_output(dfg_node) + dfg_node.add_input(inbound_node) + + else: + print ("--inbound node NOT FOUND!") + + + + + def add_to_graph(self, layer): + dfg_node = DFGNode(layer) + if not self.root_set: + self.root_node = dfg_node + self.root_set = True # DFG root node is now set + + if self.hasMultipleInputs(layer): + for j in range(len(layer.input)): + print(type(layer.input[j])) + print(layer.input[j].op.name) + self.add_dfg_edge(layer.input[j].op.name, dfg_node) + + else: + #if self.hasSingleInput(layer): + print (layer.input.name) + self.add_dfg_edge(layer.input.name, dfg_node) + + # Adding DFG node to name mapping + self.node_map[layer.name] = dfg_node + + + # Check if all predecessor nodes have been visited thus far - reverse postorder traversal + def predVisited(self, cur_node, visited_nodes): + for input_node in cur_node.inputs: + if input_node.layer_name not in visited_nodes: + return False; + + # All predecessors are visited + return True + + + def traverseNode(self, cur_node, visited_nodes): + + # Skip visited nodes + if cur_node.layer_name in visited_nodes: + return + + if self.predVisited(cur_node, visited_nodes): + print(cur_node.layer_type) + print(cur_node.layer_name) + visited_nodes[cur_node.layer_name] = True + + # Invoking traversal on outbound nodes + for output_node in cur_node.outputs: + self.traverseNode(output_node, visited_nodes) + + # NOTE: Assuming that no outbound edges implies the last node in the graph + if len(cur_node.outputs) == 0: + self.last_node = cur_node + + + # Print the DFG in reverse postorder + def printDFG(self): + + print ("\n\n ****** Printing DFG ******* \n\n") + visited_nodes = {} + # Starting traversal at the DFG root node + self.traverseNode(self.root_node, visited_nodes) + + + + +class DFGNode: + + def add_output(self, output_node): + self.outputs.append(output_node) + + def add_input(self, input_node): + self.inputs.append(input_node) + + + def __init__(self, layer): + + self.inputs = [] + self.outputs = [] + + layer_type = layer.__class__.__name__ + self.layer_type = layer_type # layer type e.g., conv2d, add, dense + self.layer_name = layer.name # unique layer identifier + print (self.layer_name) + + if layer_type == "Conv2D" or layer_type == "Dense": + self.weights = layer.get_weights()[0] + print("\t", self.weights.shape) + self.use_bias = layer.use_bias + + if layer.use_bias: + self.use_bias = layer.use_bias + self.bias_weights = layer.get_weights()[1] + print("\t", self.bias_weights.shape) + + if layer_type == "Conv2D": + self.padding = layer.padding + self.strides = layer.strides + print("\t", self.strides) + print("\tPadding = ", self.padding) + + if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": + self.pool_size = layer.pool_size + self.strides = layer.strides + print("\t pool_size = ", self.pool_size) + print("\t strides = ", self.strides) + + if layer_type == "Conv2D" or layer_type == "Dense" or layer_type == "Activation": + self.activation_type = layer.activation.__name__ + print ("\t Activation = ", self.activation_type) + + + if layer_type == "ZeroPadding2D": + print ("***ZeroPaddding \n"); + self.padding = layer.padding + print ("padding = ", self.padding); + + + +class TensorRtTranslator: + + def __init__(self, dfg): + self.dfg = dfg + self.output_map = {} + self.counter = 0 + self.weight_str = "" + self.program_str = "" + self.input_str = "" + self.filter_names = {} + + + def getWeightStr(self): + return self.weight_str + + + def getInputStr(self): + return self.input_str + + + def getFilterNames(self): + return self.filter_names + + + def getWeightVarName(self, weights): + + output_var_name = "weights_" + str(self.w_counter) + self.w_counter += 1 + self.filter_names[weights] = output_var_name + + return output_var_name + + + def getVariableName(self, cur_node): + + output_var_name = "var_" + str(self.counter) + self.counter += 1 + self.output_map[cur_node.layer_name] = output_var_name + + return output_var_name + + + def isSkipLayer(self, layer_type): + + skip_layers = {} + skip_layers["Flatten"] = 0 + skip_layers["Dropout"] = 0 + skip_layers["ZeroPadding2D"] = 0 + + if layer_type in skip_layers: + return True + else: + return False + + + # NOTE: returns the previous DFG node ignoring "Flatten", "Dropout" Layers + def getPrevActiveLayer(self, cur_node): + + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node.inputs[0]) + return cur_node + else: + return cur_node + + + + def getSingleInputName(self, cur_node): + + print (cur_node.layer_name) + # Assumption: If no inputs, the previous layer must be input layer + if len(cur_node.inputs) == 0: + return "input" + + print ("Input_type = ", cur_node.inputs[0].layer_type) + + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node) + + if cur_node.inputs[0].layer_type == "InputLayer": + return "input" + + # get input to the layer + input_node_name = cur_node.inputs[0].layer_name # get the input layer ID + + input_var_name = "" + if input_node_name in self.output_map: + input_var_name = self.output_map[input_node_name] + else: + print ("Input Var not found - Aborting....") + sys.exit(0) + + return input_var_name + + + + def getPrevLayerPadding(self, cur_node): + + print (cur_node.layer_name) + # Assumption: If no inputs, the previous layer must be input layer + if len(cur_node.inputs) == 0: + return None + + print ("Input_type = ", cur_node.inputs[0].layer_type) + if cur_node.inputs[0].layer_type == "ZeroPadding2D": + pred_padding = cur_node.inputs[0].padding + return pred_padding + + return None + + + + def getMultipleInputNames(self, cur_node): + + var_names = [] + for i in range(len(cur_node.inputs)): + # get input to the layer + input_node_name = cur_node.inputs[i].layer_name # get the input layer ID + + input_var_name = "" + if input_node_name in self.output_map: + input_var_name = self.output_map[input_node_name] + var_names.append(input_var_name) + else: + print ("Input Var not found - Aborting....") + sys.exit(0) + + return var_names + + + + def hasBiasAdd(self, cur_node): + + if cur_node.layer_type == "Conv2D" or cur_node.layer_type == "Dense": + return cur_node.use_bias + + return False + + + def hasActivation(self, cur_node): + + if cur_node.layer_type == "Conv2D" or cur_node.layer_type == "Dense": + return cur_node.activation_type != "linear" + + return False + + + def genActivationCall(self, input_var, output_var, activation_type): + + func_name = "" + if activation_type == "tanh": + func_name = "Tanh" + + if activation_type == "relu": + func_name = "Relu" + + if activation_type == "softmax": + func_name = "Softmax" + + inst_str = "void* " + output_var + " = " + inst_str += "tensor" + func_name + "(" + input_var + "); \n" + + return inst_str + + + + def genNodeCalls(self, cur_node): + + out_var_name1 = self.getVariableName(cur_node) + layer_type = cur_node.layer_type + + if layer_type == "Conv2D": + input_var_name = self.getSingleInputName(cur_node) + weights = cur_node.weights + strides = cur_node.strides + + padding = 0 + if cur_node.padding.strip() == "valid": + padding = 0 + else: + padding = cur_node.padding + padding = int((weights.shape[0] - 1) / 2) + + prev_padding = self.getPrevLayerPadding(cur_node) + if prev_padding != None: + # FIXME: currently only supporting symmetric padding + padding = prev_padding[0][0] + + inst_str = "void* " + out_var_name1 + " = " + inst_str += "tensorConvolution(" + input_var_name + ", " + inst_str += cur_node.layer_name + "_w, " + inst_str += str(padding) + ", " + inst_str += str(padding) + ", " + inst_str += str(strides[0]) + ", " + inst_str += str(strides[1]) + ", " + inst_str += "1, 0); \n" + + self.program_str += inst_str + + + if layer_type == "Dense": + input_var_name = self.getSingleInputName(cur_node) + + weights = cur_node.weights + inst_str = "void* " + out_var_name1 + " = " + inst_str += "tensorGemmGPU(" + input_var_name + ", " + inst_str += cur_node.layer_name + "_w" + inst_str += "); \n" + + self.program_str += inst_str + + + if self.hasBiasAdd(cur_node): + out_var_name2 = self.getVariableName(cur_node) + + inst_str = "void* " + out_var_name2 + " = " + inst_str += "tensorAdd(" + out_var_name1 + ", " + inst_str += cur_node.layer_name + "_b" + inst_str += "); \n" + + self.program_str += inst_str + + # NOTE: Changing output variable + out_var_name1 = out_var_name2 + + + if self.hasActivation(cur_node): + activation_type = cur_node.activation_type + out_var_name3 = self.getVariableName(cur_node) + + inst_str = self.genActivationCall(out_var_name1, out_var_name3, activation_type) + self.program_str += inst_str + + + if layer_type == "Activation": + input_var_name = self.getSingleInputName(cur_node) + + inst_str = self.genActivationCall(input_var_name, out_var_name1, cur_node.activation_type) + self.program_str += inst_str + + + if layer_type == "BatchNormalization": + input_var_name = self.getSingleInputName(cur_node) + + inst_str = "void* " + out_var_name1 + " = " + inst_str += "tensorBatchNormalization(" + input_var_name + "); \n" + self.program_str += inst_str + + + if layer_type == "Add": + input_vars = self.getMultipleInputNames(cur_node) + + inst_str = "void* " + out_var_name1 + " = " + inst_str += "tensorAdd(" + input_vars[0] + ", " + input_vars[1] + "); \n" + self.program_str += inst_str + + + if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": + input_var_name = self.getSingleInputName(cur_node) + + pool_size = cur_node.pool_size + strides = cur_node.strides + # FIXME: Non-same padding is *NOT* currently supported + padding = 0 + pool_type = 0 + if layer_type == "MaxPooling2D": + pool_type = "0" + if layer_type == "AveragePooling2D": + pool_type = "1" + + # tensorPooling(input, pool_type, pool_h, pool_w, v_pad, h_pad, v_stride, h_stride) + inst_str = "void* " + out_var_name1 + " = " + inst_str += "tensorPooling(" + input_var_name + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1]) + inst_str += "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1]) + inst_str += "); \n" + self.program_str += inst_str + + + + + + def codegenNode(self, dfg, cur_node, visited_nodes): + + # Skip visited nodes + if cur_node.layer_name in visited_nodes: + return + + if dfg.predVisited(cur_node, visited_nodes): + + visited_nodes[cur_node.layer_name] = True + + self.genNodeCalls(cur_node) + + # Invoking traversal on outbound nodes + for output_node in cur_node.outputs: + self.codegenNode(dfg, output_node, visited_nodes) + + + # Print the DFG in reverse postorder + def codegen(self, dfg): + + print ("\n\n ****** Codegen for HPVM Tensor Rt ******* \n\n") + visited_nodes = {} + # Starting traversal at the DFG root node + self.codegenNode(dfg, dfg.root_node, visited_nodes) + + + + + def dump_weights(self, model, prefix): + + layer_count = 0 + for i in range(len(model.layers)): + layer = model.layers[i] + layer_type = layer.__class__.__name__ + layer_name = layer.name + + if layer_type == "Conv2D": + weights = layer.get_weights()[0] + w_name = layer_name + "_w" + + self.filter_names[w_name] = 1 + print (weights.shape, w_name) + + N = weights.shape[3] + C = weights.shape[2] + H = weights.shape[1] + W = weights.shape[0] + + #unique_file_name = "conv" + str(layer_count) + ".bin" + unique_file_name = w_name + ".bin" + dumpConvWeights(prefix + unique_file_name, weights, N, C, H, W) + + file_path = w_name + "_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += unique_file_name + "\"); \n" + self.weight_str += file_path_str + + # FIXME: Be flexible for datatypes (currently only FP32 weights) + # NOTE: '0' specified for floating point type + self.weight_str += "void* " + w_name + " = " + " readTrainedWeights(" + self.weight_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," + str(H) + "," + str(W) + self.weight_str += "); \n" + + + if layer.use_bias: + bias_weights = layer.get_weights()[1] + b_name = layer_name + "_b" + + self.filter_names[b_name] = 1 + print (bias_weights.shape, b_name) + + #unique_file_name = "conv_bias" + str(layer_count) + ".bin" + unique_file_name = b_name + ".bin" + dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0]) + + file_path = b_name + "_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += unique_file_name + "\"); \n" + self.weight_str += file_path_str + + C = bias_weights.shape[0] + + self.weight_str += "void* " + b_name + " = " + " readTrainedWeights(" + self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n" + + + if layer_type == "Dense": + weights = layer.get_weights()[0] + w_name = layer_name + "_w" + + self.filter_names[w_name] = 1 + print (weights.shape, w_name) + + H = weights.shape[0] + W = weights.shape[1] + + #unique_file_name = "fc" + str(layer_count) + ".bin" + unique_file_name = w_name + ".bin" + dumpFcWeights(prefix + unique_file_name, weights, H, W) + + file_path = w_name + "_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += unique_file_name + "\"); \n" + self.weight_str += file_path_str + + self.weight_str += "void* " + w_name + " = " + " readTrainedWeights(" + self.weight_str += file_path + ".c_str(), 0,1,1," + str(H) + "," + str(W) + "); \n" + + + if layer.use_bias: + bias_weights = layer.get_weights()[1] + b_name = layer_name + "_b" + + self.filter_names[b_name] = 1 + print (bias_weights.shape, b_name) + + unique_file_name = b_name + ".bin" + dumpFcBias(prefix + unique_file_name, bias_weights, bias_weights.shape[0]) + + file_path = b_name + "_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += unique_file_name + "\"); \n" + self.weight_str += file_path_str + + C = bias_weights.shape[0] + + self.weight_str += "void* " + b_name + " = " + " readTrainedWeights(" + self.weight_str += file_path + ".c_str(), 0,1," + str(C) + ",1,1); \n" + + layer_count += 1 + + + + def add_header(self): + + headers = "\n#include <stdio.h> \n" + headers += "#include <stdlib.h> \n" + headers += "#include <unistd.h> \n" + headers += "#include <fcntl.h> \n" + headers += "#include <sys/types.h> \n" + headers += "#include <sys/stat.h> \n" + headers += "#include <string.h> \n" + + headers += "#include \"../../tensor_runtime/include/tensor_runtime.h\" \n" + headers += "#include \"../include/utils.h\" \n\n" + + main_func = "int main(){ \n\n" + + initialization = "llvm_hpvm_initTensorRt(0); \n\n" + + self.program_str += headers + self.program_str += main_func + self.program_str += initialization + + + + def add_footer(self, test_data): + + if test_data is not None and self.dfg.last_node is not None: + last_node = self.dfg.last_node + output_var = self.output_map[last_node.layer_name] + #accuracy_call = "\ncomputeAccuracy2(labels," + str(len(test_data)) + "," + output_var + "); \n" + #self.program_str += accuracy_call + + + destructors = "\nllvm_hpvm_cleanupTensorRt(); \n" + self.program_str += destructors + + end_main = "\nreturn 0; \n\n}\n" + self.program_str += end_main + + return 0 + + + + def genInputCalls(self, test_data, test_labels, weights_dir): + + dumpData(weights_dir + "input.bin", test_data) + + N = test_data.shape[0] + C = test_data.shape[1] + H = test_data.shape[2] + W = test_data.shape[3] + + file_path = "input_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += "input.bin\"); \n" + self.weight_str += file_path_str + + self.input_str += "void* input = readTrainedWeights(" + self.input_str += file_path + ".c_str(), 0," + str(N) + "," + str(C) + "," + self.input_str += str(H) + "," + str(W) + "); \n" + + #self.weight_str += self.input_str + + # Adding input to the filter map + self.filter_names["input"] = 1 + + dumpLabels(weights_dir + "labels.bin", test_labels) + + file_path = "labels_path" + file_path_str = "std::string " + file_path + " = " + " dir_prefix + std::string(\"" + file_path_str += "labels.bin\"); \n" + self.weight_str += file_path_str + + self.input_str += "uint8_t* labels = readLabels(" + self.input_str += file_path + ".c_str()," + str(test_labels.shape[0]) + "); \n" + + + + def genBatchLoop(self, x_test): + + N = x_test.shape[0] + C = x_test.shape[1] + H = x_test.shape[2] + W = x_test.shape[3] + + loop_str = "" + loop_str += "\nstartMemTracking(); \n\n" + + loop_str += "int test_input_size = " + str(N) + "; \n" + loop_str += "int batch_size = " + str(N) + "; \n" + loop_str += "int batch_count = test_input_size / batch_size; \n" + loop_str += "float final_accuracy = 0.0; \n\n" + + loop_str += "for(int i = 0; i < batch_count; i++){ \n\n" + loop_str += "int start = i * batch_size; \n" + loop_str += "int end = (i + 1) * batch_size; \n" + + loop_str += "\nvoid* input = readInputBatch(input_path.c_str(),0,start,end," + loop_str += str(C) + "," + str(H) + "," + str(W) + "); \n\n" + + self.program_str += loop_str + + + + def endBatchLoop(self): + + end_loop_str = "" + end_loop_str += "\nuint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); \n" + + last_node = self.dfg.last_node + output_var = self.output_map[last_node.layer_name] + accuracy_call = "\nfloat accuracy = computeAccuracy2(labels, batch_size, " + output_var + "); \n" + end_loop_str += accuracy_call + + #end_loop_str += "float accuracy = computeAccuracy2(labels, batch_size, var_60); " + end_loop_str += "final_accuracy += accuracy; \n" + end_loop_str += "freeBatchMemory(); \n " + end_loop_str += "\n}\n\n" + + end_loop_str += "final_accuracy = final_accuracy / batch_count; \n" + end_loop_str += "dumpFinalAccuracy(final_accuracy); \n\n" + + self.program_str += end_loop_str + + + + + def generateSourceProgram(self, dir_prefix): + + f = open(dir_prefix + "/src.cc", "w+") + f.write(self.program_str) + f.close() + + + + def translate(self, model, weights_dir, test_data, test_labels): + + self.add_header() + + dir_path = "std::string dir_prefix = std::string(\"" + weights_dir + "\"); \n" + self.weight_str += dir_path + + if test_data is not None: + self.genInputCalls(test_data, test_labels, weights_dir) + + self.dump_weights(model, weights_dir) + self.program_str += "\n" + self.weight_str + "\n\n" + + self.genBatchLoop(test_data) + + self.codegen(self.dfg) + + self.endBatchLoop() + + self.add_footer(test_data); + + self.generateSourceProgram(weights_dir) + + + + + +def reloadModelParams(model, reload_dir, x_test, y_test): + + print ("\n\n*****NOTE: Reloading pre-trained weights \n") + + score = model.evaluate(x_test, y_test, verbose=0) + print('Test loss2:', score[0]) + print('Test accuracy2:', score[1]) + + for i in range(len(model.layers)): + layer = model.layers[i] + layer_name = layer.name + print ("*layer_name = ", layer_name) + + if "conv" not in layer_name and "dense" not in layer_name: + continue + + w_path = reload_dir + layer_name + "_w.bin" + b_path = reload_dir + layer_name + "_b.bin" + + print ("** w_path = ", w_path) + print ("** b_path = ", b_path) + + w_arr = np.fromfile(w_path, dtype='float32') + b_arr = np.fromfile(b_path, dtype='float32') + + w_shape = layer.get_weights()[0].shape + b_shape = layer.get_weights()[1].shape + + if "conv" in layer_name: + w_nchw_shape = (w_shape[3], w_shape[2], w_shape[0], w_shape[1]) + w_arr = np.reshape(w_arr, w_nchw_shape) + b_arr = np.reshape(b_arr, b_shape) + + w_arr = np.transpose(w_arr, (2,3,1,0)) + print ("old_shape = ", w_shape, " new_shape = ", w_arr.shape) + + if "dense" in layer_name: + w_arr = np.reshape(w_arr, w_shape) + b_arr = np.reshape(b_arr, b_shape) + + weights = [] + weights.append(w_arr) + weights.append(b_arr) + # NOTE: overriding weights + layer.set_weights(weights) + + score = model.evaluate(x_test, y_test, verbose=0) + print('Test loss2:', score[0]) + print('Test accuracy2:', score[1]) + + +def getUniquePath(weights_dir): + + # Do not overwrite existing directories - create new with unique ID + if os.path.exists(weights_dir): + char_count = len(weights_dir) + if weights_dir[char_count - 1] == "/": + weights_dir = weights_dir[:char_count-1] + + tokens = weights_dir.split("_") + last_tok = tokens[len(tokens) - 1] + if last_tok.isdigit(): + id = int(last_tok) + id += 1 + weights_dir = "_".join(tokens[:-1]) + "_" + str(id) + "/" + else: + weights_dir = "_".join(tokens) + "_1/" + + weights_dir = getUniquePath(weights_dir) + + #print (weights_dir) + + return weights_dir + + +#***** Top level External Function ******* +def translate_to_approxhpvm(model, weights_dir, test_data=None, test_labels=None, + num_classes=10, reload_dir=None): + + + weights_dir = getUniquePath(weights_dir) + os.mkdir(weights_dir) + + + if reload_dir is not None: + y_test = keras.utils.to_categorical(test_labels, num_classes) + reloadModelParams(model, reload_dir, test_data, y_test) + + + dfg = DFG() + for i in range(len(model.layers)): + layer = model.layers[i] + # NOTE: Add DNN layer to graph + dfg.add_to_graph(layer) + + # Print DFG in reverse postorder + dfg.printDFG() + + + print ("test_data.shape = ", test_data.shape, "\n") + print ("test_labels.shape = ", test_labels.shape, "\n") + + tensorRtTranslator = TensorRtTranslator(dfg) + tensorRtTranslator.translate(model, weights_dir, test_data, test_labels) + weight_str = tensorRtTranslator.getWeightStr() + input_str = tensorRtTranslator.getInputStr() + + + promiseRtTranslator = PromiseRtTranslator(dfg, weight_str) + promiseRtTranslator.translate(model, weights_dir, test_data) + + filter_names = tensorRtTranslator.getFilterNames() + hpvmTranslator = HPVMTranslator(dfg, weight_str, input_str, filter_names) + hpvmTranslator.translate(model, weights_dir, test_data) + + diff --git a/llvm/projects/keras/frontend/hpvm_dfg_translator.py b/llvm/projects/keras/frontend/hpvm_dfg_translator.py new file mode 100644 index 0000000000000000000000000000000000000000..8f33a4c5e0b96c873827d3d53baa842b97477170 --- /dev/null +++ b/llvm/projects/keras/frontend/hpvm_dfg_translator.py @@ -0,0 +1,568 @@ + +import sys + + +class HPVMTranslator: + + def __init__(self, dfg, weight_str, input_str, filter_names): + self.dfg = dfg + self.output_map = {} + self.counter = 0 + self.weight_str = weight_str + self.input_str = input_str + self.filter_names = filter_names + self.node_str = "" + self.root_str = "" + self.root_struct_str = "" + self.main_func_str = "" + self.file_header_str = "" + self.hpvm_node_names = {} + + + + + + def getVariableName(self, cur_node): + output_var_name = "var_" + str(self.counter) + self.counter += 1 + self.output_map[cur_node.layer_name] = output_var_name + self.hpvm_node_names[output_var_name] = 1 + + return output_var_name + + + + def isSkipLayer(self, layer_type): + skip_layers = {} + skip_layers["Flatten"] = 0 + skip_layers["Dropout"] = 0 + skip_layers["ZeroPadding2D"] = 0 + + if layer_type in skip_layers: + return True + else: + return False + + + + # NOTE: returns the previous DFG node ignoring "Flatten", "Dropout" Layers + def getPrevActiveLayer(self, cur_node): + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node.inputs[0]) + return cur_node + else: + return cur_node + + + def getSingleInputName(self, cur_node): + print (cur_node.layer_name) + # Assumption: If no inputs, the previous layer must be input layer + if len(cur_node.inputs) == 0: + return "input" + + print ("Input_type = ", cur_node.inputs[0].layer_type) + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node) + + if cur_node.inputs[0].layer_type == "InputLayer": + return "input" + + # get input to the layer + input_node_name = cur_node.inputs[0].layer_name # get the input layer ID + + input_var_name = "" + if input_node_name in self.output_map: + input_var_name = self.output_map[input_node_name] + else: + print ("Input Var not found - Aborting....") + sys.exit(0) + + return input_var_name + + + + def getPrevLayerPadding(self, cur_node): + print (cur_node.layer_name) + # Assumption: If no inputs, the previous layer must be input layer + if len(cur_node.inputs) == 0: + return None + + print ("Input_type = ", cur_node.inputs[0].layer_type) + if cur_node.inputs[0].layer_type == "ZeroPadding2D": + pred_padding = cur_node.inputs[0].padding + return pred_padding + + return None + + + + def getMultipleInputNames(self, cur_node): + var_names = [] + for i in range(len(cur_node.inputs)): + # get input to the layer + input_node_name = cur_node.inputs[i].layer_name # get the input layer ID + + input_var_name = "" + if input_node_name in self.output_map: + input_var_name = self.output_map[input_node_name] + var_names.append(input_var_name) + else: + print ("Input Var not found - Aborting....") + sys.exit(0) + + return var_names + + + + def hasBiasAdd(self, cur_node): + if cur_node.layer_type == "Conv2D" or cur_node.layer_type == "Dense": + return cur_node.use_bias + + return False + + + def hasActivation(self, cur_node): + if cur_node.layer_type == "Conv2D" or cur_node.layer_type == "Dense": + return cur_node.activation_type != "linear" + + return False + + + def genActivationCall(self, input_var, output_var, activation_type): + header_str = self.genNodeHeader(output_var, 1) + inst_str = header_str + + func_name = "__visc__tensor_" + if activation_type == "tanh": + func_name += "tanh" + + if activation_type == "relu": + func_name += "relu" + + if activation_type == "softmax": + func_name += "softmax" + + inst_str += " void* r = " + func_name + "(t1); \n" + footer_str = self.genNodeFooter(2) + inst_str += footer_str + + return inst_str + + + def genNodeHeader(self, var_name, num_params): + node_header_str = "void " + var_name + "_node(" + for i in range(num_params): + node_header_str += "void* t" + str(i + 1) + ", " + node_header_str += "size_t bytes_t" + str(i + 1) + if i < num_params - 1: + node_header_str += ", " + + node_header_str += ") { \n" + node_header_str += " __visc__hint(visc::CUDNN_TARGET); \n" + node_header_str += " __visc__attributes(" + str(num_params) + ", " + + for i in range(num_params): + node_header_str += "t" + str(i + 1) + if i < num_params - 1: + node_header_str += ", " + + node_header_str += ", 0); \n\n" + return node_header_str + + + def genNodeFooter(self, num_params): + node_footer_str = " __visc__return(" + node_footer_str += str(num_params) + ", " + node_footer_str += "r, " + node_footer_str += "(size_t) 0); \n" + node_footer_str += "}\n\n" + + return node_footer_str + + + + def genHpvmNodeEdges(self, out_var_name, input_var_name, input_var_name2): + + print ("input_var_name2 = ", input_var_name2) + print ("input_var_name = ", input_var_name) + + hpvm_edge_str = "\n void* " + out_var_name + " = " + hpvm_edge_str += "__visc__createNodeND(0, " + out_var_name + "_node); \n\n" + + if input_var_name in self.filter_names: + input_index = self.filter_names[input_var_name] + index1 = input_index * 2 + index2 = index1 + 1 + hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index1) + ", 0, 0); \n" + hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index2) + ", 1, 0); \n" + + elif input_var_name in self.hpvm_node_names: + #input_index = self.output_map[input_var_name] + hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 0, 0, 0); \n" + hpvm_edge_str += " __visc__edge(" + input_var_name + ", " + out_var_name + ", 1, 1, 1, 0); \n" + + + if input_var_name2 in self.filter_names: + input_index = self.filter_names[input_var_name2] + index1 = input_index * 2 + index2 = index1 + 1 + hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index1) + ", 2, 0); \n" + hpvm_edge_str += " __visc__bindIn(" + out_var_name + ", " + str(index2) + ", 3, 0); \n" + + elif input_var_name2 in self.hpvm_node_names: + #input_index = self.output_map[input_var_name2] + hpvm_edge_str += " __visc__edge(" + input_var_name2 + ", " + out_var_name + ", 1, 0, 2, 0); \n" + hpvm_edge_str += " __visc__edge(" + input_var_name2 + ", " + out_var_name + ", 1, 1, 3, 0); \n" + + + return hpvm_edge_str + + + + def genDenseNode(self, cur_node): + out_var_name = self.getVariableName(cur_node) + + header_str = self.genNodeHeader(out_var_name, 2) + inst_str = header_str + inst_str += " void *r = __visc__tensor_mul(t1, t2); \n" + footer_str = self.genNodeFooter(2) + inst_str += footer_str + + #self.genHpvmNodeVar(out_var_name) + input_var_name = self.getSingleInputName(cur_node) + weight_name = cur_node.layer_name + "_w" + + self.root_str += self.genHpvmNodeEdges(out_var_name, input_var_name, weight_name) + + self.node_str += inst_str + + + + + def genConvNode(self, cur_node): + #input_var_name = self.getSingleInputName(cur_node) + out_var_name = self.getVariableName(cur_node) + + header_str = self.genNodeHeader(out_var_name, 2) + inst_str = header_str + + weights = cur_node.weights + strides = cur_node.strides + + padding = 0 + if cur_node.padding.strip() == "valid": + padding = 0 + else: + padding = cur_node.padding + padding = int((weights.shape[0] - 1) / 2) + + prev_padding = self.getPrevLayerPadding(cur_node) + if prev_padding != None: + # FIXME: currently only supporting symmetric padding + padding = prev_padding[0][0] + + inst_str += " void *r = __visc__tensor_convolution(t1, t2, " + inst_str += str(padding) + ", " + inst_str += str(padding) + ", " + inst_str += str(strides[0]) + ", " + inst_str += str(strides[1]) + inst_str += "); \n" + + footer_str = self.genNodeFooter(2) + inst_str += footer_str + + self.node_str += inst_str + + input_var_name = self.getSingleInputName(cur_node) + weight_name = cur_node.layer_name + "_w" + + self.root_str += self.genHpvmNodeEdges(out_var_name, input_var_name, weight_name) + + + + def genBiasNode(self, cur_node): + input_var_name = self.output_map[cur_node.layer_name] + out_var_name = self.getVariableName(cur_node) + + header_str = self.genNodeHeader(out_var_name, 2) + inst_str = header_str + inst_str += " void *r = __visc__tensor_add(t1, t2); \n" + footer_str = self.genNodeFooter(2) + inst_str += footer_str + + self.node_str += inst_str + + weight_name = cur_node.layer_name + "_b" + self.root_str += self.genHpvmNodeEdges(out_var_name, input_var_name, weight_name) + + + + def genSubActivationNode(self, cur_node): + input_var_name = self.output_map[cur_node.layer_name] + out_var_name = self.getVariableName(cur_node) + activation_type = cur_node.activation_type + + inst_str = self.genActivationCall(input_var_name, out_var_name, activation_type) + + self.node_str += inst_str + + self.root_str += self.genHpvmNodeEdges(out_var_name, input_var_name, "") + + + + def genActivationNode(self, cur_node): + input_var_name = self.getSingleInputName(cur_node) + out_var_name = self.getVariableName(cur_node) + activation_type = cur_node.activation_type + + inst_str = self.genActivationCall(input_var_name, out_var_name, activation_type) + self.node_str += inst_str + + self.root_str += self.genHpvmNodeEdges(out_var_name, input_var_name, "") + + + def genAddNode(self, cur_node): + out_var_name = self.getVariableName(cur_node) + + header_str = self.genNodeHeader(out_var_name, 2) + inst_str = header_str + inst_str += " void *r = __visc__tensor_add(t1, t2); \n" + footer_str = self.genNodeFooter(2) + inst_str += footer_str + + self.node_str += inst_str + + input_vars = self.getMultipleInputNames(cur_node) + self.root_str += self.genHpvmNodeEdges(out_var_name, input_vars[0], input_vars[1]) + + + + def genPoolNode(self, cur_node): + out_var_name = self.getVariableName(cur_node) + + header_str = self.genNodeHeader(out_var_name, 1) + inst_str = header_str + + pool_size = cur_node.pool_size + strides = cur_node.strides + # FIXME: Non-same padding is *NOT* currently supported + padding = 0 + pool_type = 0 + func_name = "" + + layer_type = cur_node.layer_type + if layer_type == "MaxPooling2D": + func_name = "__visc__tensor_pool_max" + if layer_type == "AveragePooling2D": + func_name = "__visc__tensor_pool_avg" + + inst_str += " void* r = " + func_name + "(t1, " + inst_str += str(pool_size[0]) + ", " + str(pool_size[1]) + ", " + inst_str += str(padding) + ", " + str(padding) + ", " + inst_str += str(strides[0]) + ", " + str(strides[1]) + "); \n" + + footer_str = self.genNodeFooter(2) + inst_str += footer_str + + self.node_str += inst_str + + input_var_name = self.getSingleInputName(cur_node) + self.root_str += self.genHpvmNodeEdges(out_var_name, input_var_name, "") + + + # Checks for call-type and redirects to functions that generate HPVM nodes + def genHpvmNodes(self, cur_node): + + layer_type = cur_node.layer_type + + if layer_type == "Conv2D": + self.genConvNode(cur_node) + + if layer_type == "Dense": + self.genDenseNode(cur_node) + + if self.hasBiasAdd(cur_node): + self.genBiasNode(cur_node) + + if self.hasActivation(cur_node): + self.genSubActivationNode(cur_node) + + if layer_type == "Activation": + self.genActivationNode(cur_node) + + if layer_type == "Add": + self.genAddNode(cur_node) + + if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": + self.genPoolNode(cur_node) + + + + + def codegenNode(self, dfg, cur_node, visited_nodes): + + # Skip visited nodes + if cur_node.layer_name in visited_nodes: + return + + if dfg.predVisited(cur_node, visited_nodes): + visited_nodes[cur_node.layer_name] = True + + self.genHpvmNodes(cur_node) + # Invoking traversal on outbound nodes + for output_node in cur_node.outputs: + self.codegenNode(dfg, output_node, visited_nodes) + + + + # Print the DFG in reverse postorder + def codegen(self, dfg): + print ("\n\n ****** Codegen for ApproxHPVM DFG Representation ******* \n\n") + visited_nodes = {} + # Starting traversal at the DFG root node + self.codegenNode(dfg, dfg.root_node, visited_nodes) + + + + + def genFileHeader(self): + headers = "\n#include <stdio.h> \n" + headers += "#include <stdlib.h> \n" + headers += "#include <unistd.h> \n" + headers += "#include <fcntl.h> \n" + headers += "#include <sys/stat.h> \n" + headers += "#include <cstring> \n" + + headers += "#include <visc.h> \n" + headers += "#include <tensorTypes.h> \n" + headers += "#include <tensorUtils.h> \n\n" + + self.file_header_str = headers + + + + def genRootNodeHeader(self): + root_signature = "void root(" + index = 0 + for f_name in self.filter_names: + if index > 0: + root_signature += "\t " + self.filter_names[f_name] = index + root_signature += "void* " + f_name + ", " + root_signature += "size_t " + f_name + "_bytes" + if index < len(self.filter_names) - 1: + root_signature += ", \n" + index += 1 + + root_signature += "){ \n\n" + root_signature += "\n __visc__hint(visc::CPU_TARGET); \n" + root_signature += " __visc__attributes(" + str(len(self.filter_names)) + ", " + + index = 0 + for f_name in self.filter_names: + root_signature += f_name + if index < len(self.filter_names) - 1: + root_signature += ", " + index += 1 + + root_signature += ", 0); \n\n" + + self.root_str += root_signature + + + def genRootNodeFooter(self): + last_node = self.dfg.last_node + output_var = self.output_map[last_node.layer_name] + + # Binding output of last DFG node to the Root Node output + root_footer_str = "\n __visc__bindOut(" + output_var + ", 0, 0, 0); \n" + root_footer_str += " __visc__bindOut(" + output_var + ", 1, 1, 0); \n" + root_footer_str += "\n}\n\n" + + self.root_str += root_footer_str + + + + def genRootStructure(self): + root_struct = "" + root_struct += "struct ret_t {\n" + root_struct += " void* tensor; \n" + root_struct += " size_t bytes; \n" + root_struct += "}; \n\n" + + root_struct += "typedef struct __attribute__((__packed__)) {\n" + for f_name in self.filter_names: + root_struct += " void* " + f_name + "; \n" + root_struct += " size_t " + f_name + "_bytes; \n" + + root_struct += "\n struct ret_t r; \n" + root_struct += "}\nRootIn;\n\n" + + self.root_struct_str += root_struct + + + + def genMainFunction(self, test_data): + main_func_str = "int main(){ \n\n" + main_func_str += self.weight_str + main_func_str += self.input_str + main_func_str += "\n__visc__init(); \n" + main_func_str += "RootIn* args = static_cast<RootIn*>(malloc(sizeof(RootIn))); \n\n" + + for f_name in self.filter_names: + main_func_str += "args->" + f_name + " = " + f_name + "; \n" + main_func_str += "args->" + f_name + "_bytes = 0; \n" + + main_func_str += "\nvoid* dfg = __visc__launch(0, root, (void*) args); \n\n" + main_func_str += "__visc__wait(dfg); \n\n" + + main_func_str += "void *result = static_cast<RootIn*>(args)->input; \n" + main_func_str += "hpvm_request_tensor(result, 0); \n\n" + main_func_str += "__visc__cleanup(); \n " + + main_func_str += "computeAccuracy2(labels, " + str(len(test_data)) + ", result); \n" + main_func_str += "return 0; \n\n" + main_func_str += "} \n" + + self.main_func_str += main_func_str + + + + + def generateSourceProgram(self, dir_prefix): + #print (self.node_str) + #print (self.root_str) + #print (self.root_struct_str) + #print (self.main_func_str) + + program_str = self.file_header_str + self.node_str + self.root_str + program_str += self.root_struct_str + self.main_func_str + + print (program_str) + + f = open(dir_prefix + "/approxhpvm_src.cc", "w+") + f.write(program_str) + f.close() + + + + def translate(self, model, weights_dir, test_data): + + self.genFileHeader() + self.genRootNodeHeader() + self.genRootStructure() + self.codegen(self.dfg) + self.genRootNodeFooter() + self.genMainFunction(test_data) + + # dump generated program string to source file + self.generateSourceProgram(weights_dir) + + + diff --git a/llvm/projects/keras/frontend/promise_translator.py b/llvm/projects/keras/frontend/promise_translator.py new file mode 100644 index 0000000000000000000000000000000000000000..a4843c529136980df82f22e16d9e4dc2db878c5d --- /dev/null +++ b/llvm/projects/keras/frontend/promise_translator.py @@ -0,0 +1,1079 @@ + + +import numpy as np +import sys +from keras import backend as K + + + +class State: + + def __init__(self): + self.ops = [] + self.op_string = "" + self.num_ops = 0 + + def clear(self): + self.ops = [] + self.op_string = "" + self.num_ops = 0 + + def add(self, cur_node, layer_type): + self.ops.append(cur_node) + if self.op_string != "": + self.op_string += "_" + + self.op_string += layer_type + self.num_ops += 1 + + + def getFirstOp(self): + return self.ops[0] + + def getLastOp(self): + return self.ops[self.num_ops - 1] + + def getDenseOp(self): + for i in range(self.num_ops): + if self.ops[i].layer_type == "Dense": + return self.ops[i] + return None # Should be unreachable + + + def getConvOp(self): + for i in range(self.num_ops): + if self.ops[i].layer_type == "Conv2D": + return self.ops[i] + return None # Should be unreachable + + + def getPoolOp(self): + for i in range(self.num_ops): + layer_type = self.ops[i].layer_type + if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": + return self.ops[i] + return None # Should be unreachable + + + def getPadOp(self): + for i in range(self.num_ops): + layer_type = self.ops[i].layer_type + if layer_type == "ZeroPadding2D": + return self.ops[i] + return None # Should be unreachable + + + def getActivationID(self): + activation_type = 'linear' + for i in range(self.num_ops): + cur_op = self.ops[i] + layer_type = cur_op.layer_type + if layer_type == "Dense" or layer_type == "Conv2D" or layer_type == "Activation": + activation_type = cur_op.activation_type + + activation_id = -1; + if activation_type == "tanh": + activation_id = 0 + if activation_type == "relu": + activation_id = 1 + + return activation_id + + + def isDense(self): + if "dense" in self.op_string: + return True + return False + + def isConv(self): + if "conv" in self.op_string: + return True + return False + + def isPool(self): + if "pool" in self.op_string and self.num_ops == 1: + return True + return False + + + def isActivation(self): + if "activation" in self.op_string and self.num_ops == 1: + return True + return False + + + def getPadding(self): + padding_op = self.getPadOp() + prev_padding = 0 + if padding_op is not None: + prev_padding = padding_op.padding[0][0] + + conv_op = self.getConvOp() + if conv_op is None: + print ("ERROR: Conv Op not found") + sys.exit(0) + + K = conv_op.weights.shape[0] + padding = 0 + padding_type = conv_op.padding.strip() + if padding_type == "valid": + padding = 0 + else: + padding = int((K - 1) / 2) + + padding = padding + prev_padding + + return padding + + + def getPoolInfo(self): + pool_op = self.getPoolOp() + if pool_op is None: + return -1, [0,0] + + pool_id = -1 + layer_type = pool_op.layer_type + if layer_type == "MaxPooling2D": + pool_id = 0 + if layer_type == "AveragePooling2D": + pool_id = 1 + + pool_size = pool_op.pool_size + return pool_id, pool_size + + + def getStrides(self): + conv_op = self.getConvOp() + strides = conv_op.strides + return strides + + + + +class PromiseRtTranslator: + + # NOTE: weight_str can be optinally passed + def __init__(self, dfg, weight_str = ""): + self.dfg = dfg + self.output_map = {} + self.visited_nodes = {} + self.counter = 0 + self.weight_str = weight_str + self.program_str = "" + self.swing_value = 9 # FP32 + self.quant_ranges = {} + # Used to generate PromiseSim Info + self.layer_str = "" + self.layer_size_str = "" + self.layer_input_sizes = {} + self.unique_op_types = {} + self.batch_size = 0 + + + def getVariableName(self, cur_node): + + output_var_name = "var_" + str(self.counter) + self.counter += 1 + self.output_map[cur_node.layer_name] = output_var_name + + return output_var_name + + + def isSkipLayer(self, layer_type): + + skip_layers = {} + skip_layers["Flatten"] = 0 + skip_layers["Dropout"] = 0 + skip_layers["ZeroPadding2D"] = 0 + + if layer_type in skip_layers: + return True + else: + return False + + + def isForwardLayer(self, layer_type): + + skip_layers = {} + skip_layers["Input"] = 0 + skip_layers["InputLayer"] = 0 + skip_layers["Flatten"] = 0 + skip_layers["Dropout"] = 0 + if layer_type in skip_layers: + return True + else: + return False + + + + def appendLayerSizeStr(self, promise_layer_type, state): + + central_op = None + if promise_layer_type == "Conv": + central_op = state.getConvOp() + if promise_layer_type == "FC": + central_op = state.getDenseOp() + + first_op = state.getFirstOp() + layer_name = first_op.layer_name + + print("** layer_name = ", layer_name) + + unique_id = 0 + if promise_layer_type not in self.unique_op_types: + self.unique_op_types[promise_layer_type] = 1 + unique_id = 1 + else: + unique_id = self.unique_op_types[promise_layer_type] + unique_id += 1 + self.unique_op_types[promise_layer_type] = unique_id + + unique_layer_name = promise_layer_type + str(unique_id) + if promise_layer_type == "Conv" or promise_layer_type == "FC": + self.layer_size_str += unique_layer_name + "," + else: + # Handling single tensor ops - NO Promise layer + self.layer_size_str += "#tensor" + unique_layer_name + "\n" + return + + + weights_shape = central_op.weights.shape + input_size = self.layer_input_sizes[layer_name] + #print ("layer_name = ", layer_name, " input_size = ", input_size) + N = self.batch_size + C = input_size[1] + + if str(C) == "?": + C = weights_shape[0] + + self.layer_size_str += str(N) + "," + str(C) + "," + + if promise_layer_type == "Conv": + H = input_size[2] + W = input_size[3] + self.layer_size_str += str(H) + "," + str(W) + "," + + + H = weights_shape[0] + W = weights_shape[1] + + if promise_layer_type == "Conv": + N = weights_shape[3] + C = weights_shape[2] + self.layer_size_str += str(N) + "," + str(C) + "," + + self.layer_size_str += str(H) + "," + str(W) + + self.layer_size_str += "\n" + + + + + + def appendLayerString(self, promise_layer_type, state): + + layer_str = "" + for op in state.ops: + op_type = op.layer_type + if op_type == "Conv2D": + layer_str += "conv " + if op.use_bias: + layer_str += "add " + if op.activation_type != "linear": + layer_str += "activation " + + if op_type == "Dense": + layer_str += "dense " + if op.use_bias: + layer_str += "add " + if op.activation_type != "linear": + layer_str += "activation " + + if "Pooling" in op_type: + layer_str += "pool " + + if op_type == "Add": + layer_str += "add " + + if op_type == "Activation": + layer_str += "activation " + + layer_str += "\n" + + self.layer_str += layer_str + + self.appendLayerSizeStr(promise_layer_type, state) + + + + + # NOTE: returns the previous DFG node ignoring "Flatten", "Dropout" Layers + def getPrevActiveLayer(self, cur_node): + + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node.inputs[0]) + return cur_node + else: + return cur_node + + + # Retrieve input name of the previous layer + def getInputLayerName(self, cur_node): + + print (cur_node.layer_name) + # Assumption: If no inputs, the previous layer must be input layer + if len(cur_node.inputs) == 0: + return "input" + + print ("Input_type = ", cur_node.inputs[0].layer_type) + + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node) + + if cur_node.inputs[0].layer_type == "InputLayer": + return "input" + + # get input to the layer + input_node_name = cur_node.inputs[0].layer_name # get the input layer ID + + return input_node_name + + + + # Retrieve input name of the previous layer + def getSingleInputName(self, cur_node): + + print (cur_node.layer_name) + # Assumption: If no inputs, the previous layer must be input layer + if len(cur_node.inputs) == 0: + return "input" + + print ("Input_type = ", cur_node.inputs[0].layer_type) + + pred_layer_type = cur_node.inputs[0].layer_type + # FIXME: Assuming the 'inference' phase - hence skipping Dropout + #if pred_layer_type == "Flatten" or pred_layer_type == "Dropout": + if self.isSkipLayer(pred_layer_type): + cur_node = self.getPrevActiveLayer(cur_node) + + if cur_node.inputs[0].layer_type == "InputLayer": + return "input" + + # get input to the layer + input_node_name = cur_node.inputs[0].layer_name # get the input layer ID + + + input_var_name = "" + if input_node_name in self.output_map: + input_var_name = self.output_map[input_node_name] + else: + print ("Input Var not found - Aborting....") + sys.exit(0) + + return input_var_name + + + + # Used to retrieve inputs for "add" operation with multiple inputs + def getMultipleInputNames(self, cur_node): + + var_names = [] + for i in range(len(cur_node.inputs)): + # get input to the layer + input_node_name = cur_node.inputs[i].layer_name # get the input layer ID + + input_var_name = "" + if input_node_name in self.output_map: + input_var_name = self.output_map[input_node_name] + var_names.append(input_var_name) + else: + print ("Input Var not found - Aborting....") + sys.exit(0) + + return var_names + + + + def getWeightNames(self, cur_node): + + layer_name = cur_node.layer_name + w_name = layer_name + "_w" + b_name = layer_name + "_b" + # If Conv has no bias Add operation + if cur_node.use_bias == False: + b_name = "NULL" + + return w_name, b_name + + + def getWeightRange(self, cur_node): + + layer_type = cur_node.layer_type + if layer_type != "Dense" and layer_type != "Conv2D": + print ("ERROR: layer_type = ", layer_type , " does not have weights ") + sys.exit(0) + + weights = cur_node.weights + min_val = np.amin(weights) + max_val = np.amax(weights) + + return min_val, max_val + + + def getBiasRange(self, cur_node): + + layer_type = cur_node.layer_type + if layer_type != "Dense" and layer_type != "Conv2D": + print ("ERROR: layer_type = ", layer_type , " does not have weights ") + sys.exit(0) + + if cur_node.use_bias == False: + return 0, 0 + + bias_weights = cur_node.bias_weights + min_val = np.amin(bias_weights) + max_val = np.amax(bias_weights) + + return min_val, max_val + + + # Returns the output value ranges for the input and output to a PROMISE layer + def getQuantRange(self, state): + + first_op = state.getFirstOp() + last_op = state.getLastOp() + + prev_layer_name = self.getInputLayerName(first_op) + cur_layer_name = last_op.layer_name + + print ("prev_layer_name ", prev_layer_name , " cur_layer_name = ", cur_layer_name) + + if prev_layer_name not in self.quant_ranges or cur_layer_name not in self.quant_ranges: + print ("ERROR: Layer_name = ", prev_layer_name ," or ", cur_layer_name, " not found in quant_range") + sys.exit(0) + + input_quant_range = self.quant_ranges[prev_layer_name] + output_quant_range = self.quant_ranges[cur_layer_name] + + print (input_quant_range) + print (output_quant_range) + + return input_quant_range, output_quant_range + + + + def genDenseLayer(self, state): + + print ("\n\n Layer = ", state.op_string, "\n\n") + + first_op = state.getFirstOp() + dense_op = state.getDenseOp() + last_op = state.getLastOp() + + input_var = self.getSingleInputName(first_op) + output_var = self.getVariableName(last_op) + + w_name, b_name = self.getWeightNames(dense_op) + w_min, w_max = self.getWeightRange(dense_op) + b_min, b_max = self.getBiasRange(dense_op) + + activation_id = state.getActivationID() + + # NOTE: retrieve the quantization ranges for inputs and ouputs + input_quant_range, output_quant_range = self.getQuantRange(state) + + promise_layer_str = "void* " + output_var + " = FCLayer_PROMISE(" + input_var + ", " + promise_layer_str += str(input_quant_range[0]) + ", " + str(input_quant_range[1]) + ", " + promise_layer_str += w_name + ", " + str(w_min) + ", " + str(w_max) + ", " + promise_layer_str += b_name + ", " + str(b_min) + ", " + str(b_max) + ", " + promise_layer_str += str(activation_id) + ", " + promise_layer_str += str(output_quant_range[0]) + ", " + str(output_quant_range[1]) + ", " + promise_layer_str += str(self.swing_value) + promise_layer_str += "); \n" + + print (promise_layer_str) + + self.program_str += promise_layer_str + + + self.appendLayerString("FC", state) + + state.clear() + + + + def genConvLayer(self, state): + + print ("\n\n Layer = ", state.op_string, "\n\n") + + first_op = state.getFirstOp() + conv_op = state.getConvOp() + last_op = state.getLastOp() + + input_var = self.getSingleInputName(first_op) + output_var = self.getVariableName(last_op) + + w_name, b_name = self.getWeightNames(conv_op) + w_min, w_max = self.getWeightRange(conv_op) + b_min, b_max = self.getBiasRange(conv_op) + + activation_id = state.getActivationID() + padding = state.getPadding() + pool_id, pool_size = state.getPoolInfo() + strides = state.getStrides() + + # NOTE: retrieve the quantization ranges for inputs and ouputs + input_quant_range, output_quant_range = self.getQuantRange(state) + + # NOTE: Assuming symmetric K*K pool size + promise_layer_str = "void* " + output_var + " = ConvLayer_PROMISE(" + input_var + ", " + promise_layer_str += str(input_quant_range[0]) + ", " + str(input_quant_range[1]) + ", " + promise_layer_str += w_name + ", " + str(w_min) + ", " + str(w_max) + ", " + promise_layer_str += b_name + ", " + str(b_min) + ", " + str(b_max) + ", " + promise_layer_str += str(padding) + ", " + str(padding) + ", " + promise_layer_str += str(strides[0]) + ", " + str(strides[1]) + ", " + promise_layer_str += str(pool_id) + ", " + str(pool_size[0]) + ", " + promise_layer_str += str(activation_id) + ", " + promise_layer_str += str(output_quant_range[0]) + ", " + str(output_quant_range[1]) + ", " + promise_layer_str += str(self.swing_value) + promise_layer_str += "); \n" + + print (promise_layer_str) + + self.program_str += promise_layer_str + + + self.appendLayerString("Conv", state) + + state.clear() + + + def genSoftmaxLayer(self, state): + print ("\n\n Layer = ", state.op_string, "\n\n") + + first_op = state.getFirstOp() + last_op = state.getLastOp() + + input_var = self.getSingleInputName(first_op) + output_var = self.getVariableName(last_op) + + promise_layer_str = "void* " + output_var + " = tensorSoftmax(" + input_var + "); \n" + print (promise_layer_str) + + self.program_str += promise_layer_str + + state.clear() + + + def genAddLayer(self, state): + print ("\n\n Layer = ", state.op_string, "\n\n") + + first_op = state.getFirstOp() + last_op = state.getLastOp() + + input_vars = self.getMultipleInputNames(first_op) + output_var = self.getVariableName(last_op) + + promise_layer_str = "void* " + output_var + " = tensorAdd(" + input_vars[0] + promise_layer_str += ", " + input_vars[1] + "); \n" + print (promise_layer_str) + + self.program_str += promise_layer_str + + + self.appendLayerString("Add", state) + + state.clear() + + + + + def genActivationLayer(self, state): + print ("\n\n Layer = ", state.op_string, "\n\n") + + first_op = state.getFirstOp() + input_var = self.getSingleInputName(first_op) + output_var = self.getVariableName(first_op) + + activation_type = first_op.activation_type + + func_name = "" + if activation_type == "tanh": + func_name = "Tanh" + + if activation_type == "relu": + func_name = "Relu" + + inst_str = "void* " + output_var + " = " + inst_str += "tensor" + func_name + "(" + input_var + "); \n" + + self.program_str += inst_str + + + self.appendLayerString(func_name, state) + + state.clear() + + + # FIXME: Only supporting single AveragePooling layers + def genPoolLayer(self, state): + print ("\n\n Layer = ", state.op_string, "\n\n") + + # For single pool layer should be all same + pool_op = state.getPoolOp() + + input_var = self.getSingleInputName(pool_op) + output_var = self.getVariableName(pool_op) + + pool_size = pool_op.pool_size + strides = pool_op.strides + # FIXME: Same padding is *NOT* currently supported + padding = 0 + pool_type = 0 + + layer_type = pool_op.layer_type + if layer_type == "MaxPooling2D": + pool_type = "0" + if layer_type == "AveragePooling2D": + pool_type = "1" + + # tensorPooling(input, pool_type, pool_h, pool_w, v_pad, h_pad, v_stride, h_stride) + inst_str = "void* " + output_var + " = " + inst_str += "tensorPooling(" + input_var + "," + pool_type + "," + str(pool_size[0]) + "," + str(pool_size[1]) + inst_str += "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1]) + inst_str += "); \n" + self.program_str += inst_str + + + self.appendLayerString("Pooling", state) + + state.clear() + + + + + + def genPreviousLayer(self, state): + + if state.isDense(): + self.genDenseLayer(state) + + elif state.isConv(): + self.genConvLayer(state) + + elif state.isPool(): + self.genPoolLayer(state) + + elif state.isActivation(): + self.genActivationLayer(state) + + + def shouldVisit(self, cur_node): + layer_name = cur_node.layer_name + # NOTE: visit a node if not already visited and all predecessors are visited + if self.dfg.predVisited(cur_node, self.visited_nodes) and layer_name not in self.visited_nodes: + return True + else: + return False + + + def handle_padding(self, cur_node, state): + if not self.shouldVisit(cur_node): + return + + layer_name = cur_node.layer_name + print (layer_name) + self.visited_nodes[layer_name] = True + + self.genPreviousLayer(state) + + # Appending conv to state + state.add(cur_node, "padding") + + self.traverseSuccessors(cur_node, state) + + + + def handle_conv(self, cur_node, state): + if not self.shouldVisit(cur_node): + return + + layer_name = cur_node.layer_name + print ("handle_conv", layer_name) + self.visited_nodes[layer_name] = True + + self.genPreviousLayer(state) + + # Appending conv to state + state.add(cur_node, "conv") + + self.traverseSuccessors(cur_node, state) + + + + def handle_add(self, cur_node, state): + if not self.shouldVisit(cur_node): + return + + layer_name = cur_node.layer_name + print (layer_name) + self.visited_nodes[layer_name] = True + + self.genPreviousLayer(state) + + # Appending conv to state + state.add(cur_node, "add") + + self.genAddLayer(state) + + self.traverseSuccessors(cur_node, state) + + + + def handle_activation(self, cur_node, state): + if not self.shouldVisit(cur_node): + return + + layer_name = cur_node.layer_name + print (layer_name) + self.visited_nodes[layer_name] = True + + # NOTE: If end of DNN + if cur_node.activation_type == "softmax": + self.genPreviousLayer(state) + state.add(cur_node, "activation") + self.genSoftmaxLayer(state) + # NOTE: return when observed end of DNN (softmax) + return + + # Appending activation to state + state.add(cur_node, "activation") + + self.traverseSuccessors(cur_node, state) + + + def handle_dense(self, cur_node, state): + if not self.shouldVisit(cur_node): + return + + layer_name = cur_node.layer_name + print (layer_name) + self.visited_nodes[layer_name] = True + + self.genPreviousLayer(state) + + state.add(cur_node, "dense") + + self.traverseSuccessors(cur_node, state) + + + def handle_pooling(self, cur_node, state): + if not self.shouldVisit(cur_node): + return + + layer_name = cur_node.layer_name + print (layer_name) + self.visited_nodes[layer_name] = True + + layer_type = cur_node.layer_type + if layer_type == "AveragePooling2D": + self.genPreviousLayer(state) + + state.add(cur_node, "pool") + + # NOTE: Will only generate pool layer if it is a standalone Pool (with no convolution) + # self.genPreviousLayer(state) + + self.traverseSuccessors(cur_node, state) + + + + + + def handleLayers(self, output_node, state): + + layer_type = output_node.layer_type + print ("layer_type", layer_type) + + if layer_type == "ZeroPadding2D": + self.handle_padding(output_node, state) + + if layer_type == "Conv2D": + self.handle_conv(output_node, state) + + if layer_type == "Dense": + self.handle_dense(output_node, state) + + if layer_type == "Activation": + self.handle_activation(output_node, state) + + if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": + self.handle_pooling(output_node, state) + + if layer_type == "Add": + self.handle_add(output_node, state) + + if(self.isForwardLayer(layer_type)): + layer_name = output_node.layer_name + print ("NOTE: Skippping = ", layer_name) + self.visited_nodes[layer_name] = True + self.traverseSuccessors(output_node, state) + + + + def traverseSuccessors(self, cur_node, state): + + for output_node in cur_node.outputs: + self.handleLayers(output_node, state) + + + + def findQuantizeRanges(self, model, x_test): + + inp = model.input # input placeholder + outputs = [layer.output for layer in model.layers] # all layer outputs + functor = K.function([inp, K.learning_phase()], outputs ) # evaluation function + + # Testing + layer_outs = functor([x_test, 1.]) + + # NOTE: Saving quant ranges for input + min_val = np.amin(x_test) + max_val = np.amax(x_test) + self.quant_ranges["input"] = (min_val, max_val) + + ind = 0 + for layer_out in layer_outs: + layer_name = model.layers[ind].name + print ("layer_name = ", layer_name) + #min_val = np.amin(layer_out) + #max_val = np.amax(layer_out) + min_val = np.percentile(layer_out, 0.5) + max_val = np.percentile(layer_out, 99.5) + print ("min_val = ", min_val, " max_val = ", max_val) + + self.quant_ranges[layer_name] = (min_val, max_val) + ind += 1 + + + + def findLayerInputSizes(self, model, x_test): + + self.batch_size = len(x_test) + for layer in model.layers: + layer_type = layer.__class__.__name__ + if layer_type == "InputLayer" or layer_type == "Add": + continue + + layer_name = layer.name + print ("layer_name = ", layer_name) + print ("layer_shape = ", layer.input.shape) + self.layer_input_sizes[layer_name] = layer.input.shape + + #inp = model.input # input placeholder + #inputs = [] + # functor = K.function([inp, K.learning_phase()], outputs ) # evaluation function + # functor2 = K.function([inp, K.learning_phase()], inputs ) # evaluation function + # Testing + # layer_outs = functor([x_test, 1.]) + # layer_inputs = functor2([x_test, 1.]) + + #index = 0 + #for layer_in in layer_inputs: + # print ("layer_in = ", layer_in.shape) + # layer_name = model.layers[index].name + # self.layer_input_sizes[layer_name] = layer_in.shape + # index += 1 + + + + def genExecutionLoop(self): + + exec_loop = "" + exec_loop += "int total_runs = 100; \n" + exec_loop += "for (int i = 0 ; i < total_runs; i++){ \n\n" + + return exec_loop + + + def endExecutionLoop(self): + + end_exec_loop = "\n}\n" + return end_exec_loop + + + def genBatchLoop(self, x_test): + + N = x_test.shape[0] + C = x_test.shape[1] + H = x_test.shape[2] + W = x_test.shape[3] + + loop_str = "" + loop_str += "\nstartMemTracking(); \n\n" + + loop_str += "int test_input_size = " + str(N) + "; \n" + loop_str += "int batch_size = " + str(N) + "; \n" + loop_str += "int batch_count = test_input_size / batch_size; \n" + loop_str += "float final_accuracy = 0.0; \n\n" + + loop_str += "for(int i = 0; i < batch_count; i++){ \n\n" + loop_str += "\n\n" + self.weight_str + "\n\n" + loop_str += "int start = i * batch_size; \n" + loop_str += "int end = (i + 1) * batch_size; \n" + + loop_str += "\nvoid* input = readInputBatch(input_path.c_str(),0,start,end," + loop_str += str(C) + "," + str(H) + "," + str(W) + "); \n\n" + + return loop_str + + + + def endBatchLoop(self): + + end_loop_str = "" + end_loop_str += "\nuint8_t* labels = readLabelsBatch(labels_path.c_str(),start,end); \n" + + last_node = self.dfg.last_node + output_var = self.output_map[last_node.layer_name] + accuracy_call = "\nfloat accuracy = computeAccuracy2(labels, batch_size, " + output_var + "); \n" + end_loop_str += accuracy_call + + #end_loop_str += "float accuracy = computeAccuracy2(labels, batch_size, var_60); " + end_loop_str += "final_accuracy += accuracy; \n" + end_loop_str += "freeBatchMemory(); \n " + end_loop_str += "\n}\n\n" + + end_loop_str += "final_accuracy = final_accuracy / batch_count; \n" + end_loop_str += "dumpFinalAccuracy(final_accuracy); \n\n" + + return end_loop_str + + + + + def genHeader(self): + + headers = "\n#include <stdio.h> \n" + headers += "#include <stdlib.h> \n" + headers += "#include <unistd.h> \n" + headers += "#include <fcntl.h> \n" + headers += "#include <sys/types.h> \n" + headers += "#include <sys/stat.h> \n" + headers += "#include <string.h> \n" + + headers += "#include \"../../../tensor_runtime/include/tensor_runtime.h\" \n" + headers += "#include \"../../include/utils.h\" \n\n" + + main_func = "int main(){ \n\n" + + initialization = "llvm_hpvm_initTensorRt(0); \n\n" + + # Merging into one header string + header_str = headers + header_str += main_func + header_str += initialization + + return header_str + + + + + def genFooter(self, test_data): + + footer_str = "" + if test_data is not None and self.dfg.last_node is not None: + last_node = self.dfg.last_node + output_var = self.output_map[last_node.layer_name] + #accuracy_call = "\ncomputeAccuracy2(labels," + str(len(test_data)) + "," + output_var + "); \n" + #footer_str += accuracy_call + + accuracy_call = "\ndumpExecutionAccuracies(); \n" + footer_str += accuracy_call + + destructors = "\nllvm_hpvm_cleanupTensorRt(); \n" + footer_str += destructors + + end_main = "\nreturn 0; \n\n}\n" + footer_str += end_main + + return footer_str + + + + def dumpLayerStr(self, dir_prefix): + + f = open(dir_prefix + "/layer_composition.txt", "w+") + f.write(self.layer_str) + f.close() + + f = open(dir_prefix + "/layers.txt", "w+") + f.write(self.layer_size_str) + f.close() + + + + def dumpProgramString(self, final_str, dir_prefix): + + f = open(dir_prefix + "/promise_src.cc", "w+") + f.write(final_str) + f.close() + + + def generateSourceProgram(self, weights_dir, x_test): + + print(self.program_str) + + final_str = "" + header_str = self.genHeader() + final_str += header_str + + exec_loop = self.genExecutionLoop() + final_str += exec_loop + + loop_str = self.genBatchLoop(x_test) + final_str += loop_str + + #final_str += "\n\n" + self.weight_str + "\n\n" + final_str += self.program_str + + end_loop_str = self.endBatchLoop() + final_str += end_loop_str + + end_exec_loop = self.endExecutionLoop() + final_str += end_exec_loop + + footer_str = self.genFooter(x_test) + final_str += footer_str + print (final_str) + + self.dumpProgramString(final_str, weights_dir) + + + + def translate(self, model, weights_dir, x_test): + + print ("\n\n\n **** PromiseRT Translator ****** \n\n\n") + root_node = self.dfg.root_node + state = State() + + self.findLayerInputSizes(model, x_test) + + self.findQuantizeRanges(model, x_test) + + #self.traverseSuccessors(root_node, state) + self.handleLayers(root_node, state) + + print ("\n *** Generated PROMISE Layers **** \n ") + + self.generateSourceProgram(weights_dir, x_test) + + self.dumpLayerStr(weights_dir) + + #print (self.layer_size_str) diff --git a/llvm/projects/keras/frontend/setup.py b/llvm/projects/keras/frontend/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..9da7193379454d1d5cdc1e63f6b436d3771e15a5 --- /dev/null +++ b/llvm/projects/keras/frontend/setup.py @@ -0,0 +1,12 @@ + +from setuptools import setup + +setup( + name='frontend', + version='1.0', + description='ApproxHPVM frontend modules', + author='Hashim', + author_email='hsharif3@illinois.edu', + packages=['frontend'], + install_requires=[], +) diff --git a/llvm/projects/keras/frontend/weight_utils.py b/llvm/projects/keras/frontend/weight_utils.py new file mode 100644 index 0000000000000000000000000000000000000000..a04827dd7be1bbeb81040fc9c9184567d1bfd579 --- /dev/null +++ b/llvm/projects/keras/frontend/weight_utils.py @@ -0,0 +1,125 @@ + +import numpy as np +import struct +import random + + +def dumpLabels(file_name, Y_test): + + f = open(file_name, "wb") + + labels_map = {} + for label in Y_test: + label_val = 0 + if len(Y_test.shape) > 1: + label_val = np.int8(label[0]) + else: + label_val = np.int8(label) + + if label_val not in labels_map: + labels_map[label_val] = 0 + labels_map[label_val] += 1 + + f.write(label_val) + + f.close() + + + +def dumpData(file_name, X_test): + + N = X_test.shape[0] + C = X_test.shape[1] + H = X_test.shape[2] + W = X_test.shape[3] + + print ("*DumpData") + print("-min_val = ", np.amin(X_test)) + print("-max_val = ", np.amax(X_test)) + + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l])) + f.write(np.float32(val[0])) + + f.close() + + +def dumpConvWeights(file_name, weights, N, C, H, W): + + print (weights.shape) + print ("*DumpConvWeights") + print("-min_val = ", np.amin(weights)) + print("-max_val = ", np.amax(weights)) + + + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + f.write(weights[k][l][j][i]) + + f.close() + + + +def dumpFcWeights(file_name, weights, H, W): + + print (weights.shape) + print ("*DumpFcWeights") + print("-min_val = ", np.amin(weights)) + print("-max_val = ", np.amax(weights)) + + + f = open(file_name, "wb") + for i in range(H): + for j in range(W): + f.write(weights[i][j]) + + f.close() + + + +def dumpFcBias(file_name, bias, W): + + print (bias.shape) + print ("*DumpFcBias") + print("-min_val = ", np.amin(bias)) + print("-max_val = ", np.amax(bias)) + + + f = open(file_name, "wb") + for i in range(W): + f.write(bias[i]) + + f.close() + + + +def dumpCalibrationData(file_name, X_train, labels_fname, train_labels): + + combined_list = [] + for i in range(len(X_train)): + tup = (X_train[i], train_labels[i]) + combined_list.append(tup) + + np.random.shuffle(combined_list) + #X_calibration = X_train[0:5000] + + data_list = [] + labels_list = [] + for i in range(5000): + tup = combined_list[i] + data_list.append(tup[0]) + labels_list.append(tup[1]) + + data_list = np.array(data_list) + labels_list = np.array(labels_list) + + dumpData(file_name, data_list) + dumpLabels(labels_fname, labels_list) + diff --git a/llvm/projects/keras/keras_environment.yml b/llvm/projects/keras/keras_environment.yml new file mode 100644 index 0000000000000000000000000000000000000000..1c9942f13d3f2a202c6bdc9cfcf689d32c5373b9 --- /dev/null +++ b/llvm/projects/keras/keras_environment.yml @@ -0,0 +1,331 @@ +name: prakalp_keras +channels: + - pytorch + - conda-forge + - defaults +dependencies: + - absl-py=0.6.1=py36_0 + - anaconda-project=0.8.2=py36_0 + - asn1crypto=0.24.0=py36_0 + - automat=0.7.0=py36_0 + - babel=2.6.0=py36_0 + - backports=1.0=py36_1 + - backports.os=0.1.1=py36_0 + - beautifulsoup4=4.6.3=py36_0 + - bkcharts=0.2=py36_0 + - blaze=0.11.3=py36_0 + - conda=4.5.11=py36_0 + - conda-env=2.6.0=1 + - contextlib2=0.5.5=py36_0 + - cycler=0.10.0=py36_0 + - dill=0.2.8.2=py36_0 + - docutils=0.14=py36_0 + - entrypoints=0.2.3=py36_2 + - et_xmlfile=1.0.1=py36_0 + - idna=2.7=py36_0 + - imageio=2.4.1=py36_0 + - importlib_metadata=0.6=py36_0 + - ipython_genutils=0.2.0=py36_0 + - isort=4.3.4=py36_0 + - jdcal=1.4=py36_0 + - jedi=0.13.1=py36_0 + - jinja2=2.10=py36_0 + - jmespath=0.9.3=py36_0 + - jsonschema=2.6.0=py36_0 + - jupyter_client=5.2.3=py36_0 + - jupyterlab=0.35.3=py36_0 + - keyring=16.0.0=py36_0 + - libgcc=7.2.0=h69d50b8_2 + - libgfortran=3.0.0=1 + - locket=0.2.0=py36_1 + - more-itertools=4.3.0=py36_0 + - nbconvert=5.3.1=py36_0 + - nbformat=4.4.0=py36_0 + - nose=1.3.7=py36_2 + - notebook=5.7.0=py36_0 + - numpydoc=0.8.0=py36_0 + - odo=0.5.1=py36_0 + - pathlib2=2.3.2=py36_0 + - pexpect=4.6.0=py36_0 + - pickleshare=0.7.5=py36_0 + - ply=3.11=py36_0 + - ptyprocess=0.6.0=py36_0 + - pycodestyle=2.4.0=py36_0 + - pygments=2.2.0=py36_0 + - pylint=2.1.1=py36_0 + - pyopenssl=18.0.0=py36_0 + - qtconsole=4.4.2=py36_0 + - requests=2.19.1=py36_0 + - s3transfer=0.1.13=py36_0 + - secretstorage=3.1.0=py36_0 + - setuptools=40.5.0=py36_0 + - singledispatch=3.4.0.3=py36_0 + - six=1.11.0=py36_1 + - snowballstemmer=1.2.1=py36_0 + - sortedcollections=1.0.1=py36_0 + - sphinx=1.8.1=py36_0 + - spyder=3.3.1=py36_1 + - sympy=1.3=py36_0 + - tblib=1.3.2=py36_0 + - termcolor=1.1.0=py36_1 + - terminado=0.8.1=py36_1 + - testpath=0.4.2=py36_0 + - torchvision=0.2.1=py36_0 + - traitlets=4.3.2=py36_0 + - typing=3.6.4=py36_0 + - unicodecsv=0.14.1=py36_0 + - urllib3=1.23=py36_0 + - wcwidth=0.1.7=py36_0 + - wheel=0.32.2=py36_0 + - widgetsnbextension=3.4.2=py36_0 + - xlwt=1.3.0=py36_0 + - _license=1.1=py36_1 + - _tflow_select=2.1.0=gpu + - alabaster=0.7.12=py36_0 + - anaconda-client=1.7.2=py36_0 + - anaconda=custom=py36hbbc8b67_0 + - anaconda-navigator=1.9.2=py36_0 + - appdirs=1.4.3=py36h28b3542_0 + - astor=0.7.1=py36_0 + - astroid=2.0.4=py36_0 + - astropy=3.0.5=py36h7b6447c_0 + - atomicwrites=1.2.1=py36_0 + - attrs=18.2.0=py36h28b3542_0 + - backcall=0.1.0=py36_0 + - backports.shutil_get_terminal_size=1.0.0=py36_2 + - bitarray=0.8.3=py36h14c3975_0 + - blas=1.0=mkl + - bleach=3.0.2=py36_0 + - blosc=1.14.4=hdbcaa40_0 + - bokeh=1.0.1=py36_0 + - boto=2.49.0=py36_0 + - boto3=1.9.35=py36_0 + - botocore=1.12.35=py36_0 + - bottleneck=1.2.1=py36h035aef0_1 + - bz2file=0.98=py36_1 + - bzip2=1.0.6=h14c3975_5 + - ca-certificates=2018.03.07=0 + - cairo=1.14.12=h8948797_3 + - certifi=2018.10.15=py36_0 + - cffi=1.11.5=py36he75722e_1 + - chardet=3.0.4=py36_1 + - chest=0.2.3=py36_1 + - click=7.0=py36_0 + - cloudpickle=0.6.1=py36_0 + - clyent=1.2.2=py36_1 + - colorama=0.4.0=py36_0 + - configobj=5.0.6=py36_1 + - constantly=15.1.0=py36h28b3542_0 + - cryptography=2.3.1=py36hc365091_0 + - cudatoolkit=9.0=h13b8566_0 + - cudnn=7.1.2=cuda9.0_0 + - cupti=9.0.176=0 + - curl=7.61.0=h84994c4_0 + - cython=0.29=py36he6710b0_0 + - cytoolz=0.9.0.1=py36h14c3975_1 + - dask=0.20.0=py36_0 + - dask-core=0.20.0=py36_0 + - datashape=0.5.4=py36_1 + - dbus=1.13.2=h714fa37_1 + - decorator=4.3.0=py36_0 + - defusedxml=0.5.0=py36_1 + - distributed=1.24.0=py36_0 + - expat=2.2.6=he6710b0_0 + - fastcache=1.0.2=py36h14c3975_2 + - filelock=3.0.10=py36_0 + - flask=1.0.2=py36_1 + - flask-cors=3.0.6=py36_0 + - fontconfig=2.13.0=h9420a91_0 + - freetype=2.9.1=h8a8886c_1 + - fribidi=1.0.5=h7b6447c_0 + - gast=0.2.0=py36_0 + - gensim=3.4.0=py36h14c3975_0 + - get_terminal_size=1.0.0=haa9412d_0 + - gevent=1.3.7=py36h7b6447c_1 + - glib=2.56.2=hd408876_0 + - glob2=0.6=py36_1 + - gmp=6.1.2=h6c8ec71_1 + - gmpy2=2.0.8=py36h10f8cd9_2 + - graphite2=1.3.12=h23475e2_2 + - greenlet=0.4.15=py36h7b6447c_0 + - grpcio=1.12.1=py36hdbcaa40_0 + - gst-plugins-base=1.14.0=hbbd80ab_1 + - gstreamer=1.14.0=hb453b48_1 + - h5py=2.8.0=py36h989c5e5_3 + - harfbuzz=1.8.8=hffaf4a1_0 + - hdf5=1.10.2=hba1933b_1 + - heapdict=1.0.0=py36_2 + - html5lib=1.0.1=py36_0 + - hyperlink=18.0.0=py36_0 + - icu=58.2=h9c2bf20_1 + - imagesize=1.1.0=py36_0 + - incremental=17.5.0=py36_0 + - intel-openmp=2019.0=118 + - ipykernel=5.1.0=py36h39e3cac_0 + - ipython=7.1.1=py36h39e3cac_0 + - ipywidgets=7.4.2=py36_0 + - itsdangerous=1.1.0=py36_0 + - jbig=2.1=hdba287a_0 + - jeepney=0.4=py36_0 + - jpeg=9b=h024ee3a_2 + - jupyter=1.0.0=py36_7 + - jupyter_console=6.0.0=py36_0 + - jupyter_core=4.4.0=py36_0 + - jupyterlab_launcher=0.13.1=py36_0 + - jupyterlab_server=0.2.0=py36_0 + - keras=2.1.6=py36_0 + - keras-applications=1.0.6=py36_0 + - keras-preprocessing=1.0.5=py36_0 + - kiwisolver=1.0.1=py36hf484d3e_0 + - lazy-object-proxy=1.3.1=py36h14c3975_2 + - libcurl=7.61.0=h1ad7b7a_0 + - libedit=3.1.20170329=h6b74fdf_2 + - libffi=3.2.1=hd88cf55_4 + - libgcc-ng=8.2.0=hdf63c60_1 + - libgfortran-ng=7.3.0=hdf63c60_0 + - libiconv=1.15=h63c8f33_5 + - libpng=1.6.35=hbc83047_0 + - libprotobuf=3.6.1=hd408876_0 + - libsodium=1.0.16=h1bed415_0 + - libssh2=1.8.0=h9cfc8f7_4 + - libstdcxx-ng=8.2.0=hdf63c60_1 + - libtiff=4.0.9=he85c1e1_2 + - libtool=2.4.6=h7b6447c_5 + - libuuid=1.0.3=h1bed415_2 + - libxcb=1.13=h1bed415_1 + - libxml2=2.9.8=h26e45fe_1 + - libxslt=1.1.32=h1312cb7_0 + - llvmlite=0.25.0=py36hd408876_0 + - lxml=4.2.5=py36hefd8a0e_0 + - lzo=2.10=h49e0be7_2 + - markdown=3.0.1=py36_0 + - markupsafe=1.0=py36h14c3975_1 + - matplotlib=3.0.1=py36h5429711_0 + - mccabe=0.6.1=py36_1 + - mistune=0.8.4=py36h7b6447c_0 + - mkl=2018.0.3=1 + - mkl-service=1.1.2=py36h90e4bf4_5 + - mkl_fft=1.0.6=py36h7dd41cf_0 + - mkl_random=1.0.1=py36h4414c95_1 + - mpc=1.1.0=h10f8cd9_1 + - mpfr=4.0.1=hdf1c602_3 + - mpmath=1.0.0=py36_2 + - msgpack-python=0.5.6=py36h6bb024c_1 + - multipledispatch=0.6.0=py36_0 + - navigator-updater=0.2.1=py36_0 + - nccl=1.3.5=cuda9.0_0 + - ncurses=6.1=hf484d3e_0 + - networkx=2.2=py36_1 + - ninja=1.8.2=py36h6bb024c_1 + - nltk=3.3.0=py36_0 + - numba=0.40.0=py36h962f231_0 + - numexpr=2.6.8=py36hd89afb7_0 + - numpy=1.15.3=py36h1d66e8a_0 + - numpy-base=1.15.3=py36h81de0dd_0 + - olefile=0.46=py36_0 + - openpyxl=2.5.9=py36_0 + - openssl=1.0.2p=h14c3975_0 + - packaging=18.0=py36_0 + - pandas=0.23.4=py36h04863e7_0 + - pandoc=2.2.3.2=0 + - pandocfilters=1.4.2=py36_1 + - pango=1.42.4=h049681c_0 + - parso=0.3.1=py36_0 + - partd=0.3.9=py36_0 + - patchelf=0.9=he6710b0_3 + - path.py=11.5.0=py36_0 + - patsy=0.5.1=py36_0 + - pcre=8.42=h439df22_0 + - pep8=1.7.1=py36_0 + - pillow=5.3.0=py36h34e0f95_0 + - pip=18.1=py36_0 + - pixman=0.34.0=hceecf20_3 + - pkginfo=1.4.2=py36_1 + - pluggy=0.8.0=py36_0 + - prometheus_client=0.4.2=py36_0 + - prompt_toolkit=2.0.7=py36_0 + - protobuf=3.6.1=py36he6710b0_0 + - psutil=5.4.8=py36h7b6447c_0 + - py=1.7.0=py36_0 + - pyasn1=0.4.4=py36h28b3542_0 + - pyasn1-modules=0.2.2=py36_0 + - pycosat=0.6.3=py36h14c3975_0 + - pycparser=2.19=py36_0 + - pycrypto=2.6.1=py36h14c3975_9 + - pycurl=7.43.0.2=py36hb7f436b_0 + - pyflakes=2.0.0=py36_0 + - pyhamcrest=1.9.0=py36_2 + - pyodbc=4.0.24=py36he6710b0_0 + - pyparsing=2.2.2=py36_0 + - pyqt=5.9.2=py36h05f1152_2 + - pysocks=1.6.8=py36_0 + - pytables=3.4.4=py36ha205bf6_0 + - pytest=3.9.3=py36_0 + - pytest-arraydiff=0.2=py36h39e3cac_0 + - pytest-astropy=0.4.0=py36_0 + - pytest-doctestplus=0.1.3=py36_0 + - pytest-openfiles=0.3.0=py36_0 + - pytest-remotedata=0.3.1=py36_0 + - python=3.6.6=h6e4f718_2 + - python-dateutil=2.7.5=py36_0 + - pytorch=0.4.1=py36ha74772b_0 + - pytz=2018.7=py36_0 + - pywavelets=1.0.1=py36hdd07704_0 + - pyyaml=3.13=py36h14c3975_0 + - pyzmq=17.1.2=py36h14c3975_0 + - qt=5.9.6=h8703b6f_2 + - qtawesome=0.5.2=py36_0 + - qtpy=1.5.2=py36_0 + - readline=7.0=h7b6447c_5 + - redis=5.0.0=h7b6447c_0 + - redis-py=2.10.6=py36_0 + - rope=0.11.0=py36_0 + - ruamel_yaml=0.15.46=py36h14c3975_0 + - scikit-image=0.14.0=py36hf484d3e_1 + - scikit-learn=0.20.0=py36h4989274_1 + - scipy=1.1.0=py36hfa4b5c9_1 + - seaborn=0.9.0=py36_0 + - send2trash=1.5.0=py36_0 + - service_identity=17.0.0=py36h28b3542_0 + - simplegeneric=0.8.1=py36_2 + - sip=4.19.8=py36hf484d3e_0 + - smart_open=1.7.1=py36_0 + - snappy=1.1.7=hbae5bb6_3 + - sockjs-tornado=1.0.6=py36_0 + - sortedcontainers=2.0.5=py36_0 + - sphinxcontrib=1.0=py36_1 + - sphinxcontrib-websupport=1.1.0=py36_1 + - spyder-kernels=0.2.6=py36_0 + - sqlalchemy=1.2.12=py36h7b6447c_0 + - sqlite=3.25.2=h7b6447c_0 + - statsmodels=0.9.0=py36h035aef0_0 + - tensorboard=1.11.0=py36hf484d3e_0 + - tensorflow=1.11.0=gpu_py36h4459f94_0 + - tensorflow-base=1.11.0=gpu_py36h8e0ae2d_0 + - tensorflow-gpu=1.11.0=h0d30ee6_0 + - tk=8.6.8=hbc83047_0 + - toolz=0.9.0=py36_0 + - tornado=5.1.1=py36h7b6447c_0 + - tqdm=4.28.1=py36h28b3542_0 + - twisted=18.9.0=py36h7b6447c_0 + - typed-ast=1.1.0=py36h14c3975_0 + - unixodbc=2.3.7=h14c3975_0 + - webencodings=0.5.1=py36_1 + - werkzeug=0.14.1=py36_0 + - wrapt=1.10.11=py36h14c3975_2 + - xlrd=1.1.0=py36_1 + - xlsxwriter=1.1.2=py36_0 + - xz=5.2.4=h14c3975_4 + - yaml=0.1.7=had09818_2 + - zeromq=4.2.5=hf484d3e_1 + - zict=0.1.3=py36_0 + - zlib=1.2.11=ha838bed_2 + - zope=1.0=py36_1 + - zope.interface=4.6.0=py36h7b6447c_0 + - cuda91=1.0=h4c16780_0 + - pip: + - msgpack==0.5.6 + - tables==3.4.4 + - torch==0.4.1 +prefix: /home/hsharif3/anaconda2/envs/prakalp_keras + diff --git a/llvm/projects/keras/setup.py b/llvm/projects/keras/setup.py new file mode 100644 index 0000000000000000000000000000000000000000..9da7193379454d1d5cdc1e63f6b436d3771e15a5 --- /dev/null +++ b/llvm/projects/keras/setup.py @@ -0,0 +1,12 @@ + +from setuptools import setup + +setup( + name='frontend', + version='1.0', + description='ApproxHPVM frontend modules', + author='Hashim', + author_email='hsharif3@illinois.edu', + packages=['frontend'], + install_requires=[], +) diff --git a/llvm/projects/keras/src/alexnet.py b/llvm/projects/keras/src/alexnet.py new file mode 100644 index 0000000000000000000000000000000000000000..b1f29bfc05da52cf1cded1d1f27761c33af3695b --- /dev/null +++ b/llvm/projects/keras/src/alexnet.py @@ -0,0 +1,173 @@ + +import numpy as np + +from keras.datasets import cifar10 +from keras.models import Sequential +from keras.layers.core import Dense, Dropout, Flatten, Activation +from keras.layers.convolutional import Conv2D +from keras.optimizers import Adam +from keras.layers.pooling import MaxPooling2D +from keras.utils.np_utils import to_categorical +from keras.preprocessing.image import ImageDataGenerator +from keras import backend as K +from keras import regularizers +from keras.callbacks import LearningRateScheduler +import sys +import struct +import keras +import numpy as np +import os +from frontend.approxhpvm_translator import translate_to_approxhpvm +from frontend.weight_utils import dumpCalibrationData + + + +def lr_schedule(epoch): + lrate = 0.001 + if epoch > 20: + lrate = 0.0005 + if epoch > 40: + lrate = 0.0003 + if epoch > 60: + lrate = 0.0001 + if epoch > 80: + lrate = 0.00005 + + return lrate + + + +def buildModel2(): + + activation_type = "tanh" + weight_decay = 1e-4 + + model = Sequential() + model.add(Conv2D(64, kernel_size=(11, 11), activation=activation_type, + input_shape=(3, 32, 32), padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.2)) + model.add(Conv2D(192, kernel_size=(5, 5), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay))) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.3)) + + model.add(Conv2D(384, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(Conv2D(256, kernel_size=(3, 3), activation=activation_type, padding = 'same', + kernel_regularizer=regularizers.l2(weight_decay) )) + model.add(MaxPooling2D(pool_size=(2, 2), strides=(2,2) )) + model.add(Dropout(0.4)) + + model.add(Flatten()) + #model.add(Flatten()) + #model.add(Dense(256)) + model.add(Dense(10)) + model.add(Activation('softmax')) + + return model + + + +def buildModel(): + + model = Sequential() + model.add(Conv2D(128, kernel_size=(3, 3), activation='tanh', input_shape=(3, 32, 32), padding = 'same')) + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + #model.add(Dropout(0.25)) + + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Conv2D(256, kernel_size=(3, 3), activation='tanh', padding = 'same')) + model.add(MaxPooling2D(pool_size=(2, 2))) + #model.add(Dropout(0.25)) + + model.add(Flatten()) + #model.add(Flatten()) + model.add(Dense(4096, activation='tanh')) + #model.add(Dropout(0.5)) + model.add(Dense(2048, activation='tanh')) + model.add(Dense(10, activation='tanh')) + model.add(Activation('softmax')) + + return model + + + + +def trainModel(model): + + (X_train, Y_train), (X_test, Y_test) = cifar10.load_data() + test_labels = Y_test + train_labels = Y_train + + #X_train = X_train.astype('float32') + #X_test = X_test.astype('float32') + X_train = X_train / 255.0 + X_test = X_test / 255.0 + + mean = np.mean(X_train,axis=(0,1,2,3)) + std = np.std(X_train,axis=(0,1,2,3)) + X_train = (X_train-mean)/(std+1e-7) + X_test = (X_test-mean)/(std+1e-7) + + dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet_cifar10/" + + #opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) + # Compile the model + model.compile(loss='categorical_crossentropy', + optimizer=Adam(lr=0.0001, decay=1e-6), + #optimizer = opt_rms, + metrics=['accuracy']) + + #print to_categorical(Y_train, 10) + print (to_categorical(Y_train)) + + + datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + datagen.fit(X_train) + + + model.fit(X_train, to_categorical(Y_train, 10), + batch_size=128, + shuffle=True, + epochs = 1, + #epochs=100, + validation_data=(X_test, to_categorical(Y_test, 10)), callbacks=[LearningRateScheduler(lr_schedule)]) + + # Evaluate the model + scores = model.evaluate(X_test, to_categorical(Y_test, 10)) + + print('Loss: %.3f' % scores[0]) + print('Accuracy: %.3f' % scores[1]) + + print ("*** TRAINED MODEL ****\n") + + + #dumpCalibrationData("calibration_data/alexnet_calib.bin", X_train, + # "calibration_data/alexnet_train_labels.bin", train_labels) + + translate_to_approxhpvm(model, "data/alexnet_cifar10/", X_test, test_labels, 10) + + + +if __name__ == "__main__": + + os.environ["CUDA_VISIBLE_DEVICES"] = "0" + # Changing to NCHW format + K.set_image_data_format('channels_first') + + model = buildModel2() + trainModel(model) diff --git a/llvm/projects/keras/src/alexnet2.py b/llvm/projects/keras/src/alexnet2.py new file mode 100644 index 0000000000000000000000000000000000000000..812b212165666370092a3d55e8482643b550f830 --- /dev/null +++ b/llvm/projects/keras/src/alexnet2.py @@ -0,0 +1,243 @@ + +import keras +from keras.models import Sequential +from keras.utils import np_utils +from keras.preprocessing.image import ImageDataGenerator +from keras.layers import Dense, Activation, Flatten, Dropout, BatchNormalization +from keras.layers import Conv2D, MaxPooling2D +from keras.datasets import cifar10 +from keras import regularizers +from keras.callbacks import LearningRateScheduler +import numpy as np +import os +import struct +from keras import backend as K +from approxhpvm_translator import translate_to_approxhpvm + + + +def dumpWeights(file_name, weights, N, H, W, C): + # NOTE: Writing the NHWC weights array as NCHW + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + f.write(weights[i][k][l][j]) + + f.close() + + +def dumpConvWeights(file_name, weights, N, C, H, W): + + print (weights.shape) + + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + f.write(weights[k][l][j][i]) + f.close() + + + +def dumpFcWeights(file_name, weights, H, W): + + print (weights.shape) + + f = open(file_name, "wb") + for i in range(H): + for j in range(W): + f.write(weights[i][j]) + f.close() + + +def dumpFcBias(file_name, bias, W): + + print (bias.shape) + + f = open(file_name, "wb") + for i in range(W): + f.write(bias[i]) + f.close() + + +def dumpLabels(file_name, Y_test): + + f = open(file_name, "wb") + + labels_map = {} + for label in Y_test: + label_val = np.int8(label[0]) + if label_val not in labels_map: + labels_map[label_val] = 0 + labels_map[label_val] += 1 + + f.write(label_val) + + f.close() + + + +def dumpData(X_test, file_name, N, C, H, W): + + print (X_test.shape) + + f = open(file_name, "wb") + for i in range(N): + for j in range(C): + for k in range(H): + for l in range(W): + val = struct.unpack("f", struct.pack("f", X_test[i][j][k][l])) + f.write(np.float32(val[0])) + + f.close() + + + + + +def lr_schedule(epoch): + lrate = 0.001 + if epoch > 75: + lrate = 0.0005 + if epoch > 100: + lrate = 0.0003 + return lrate + + +def lr_schedule2(epoch): + lrate = 0.0005 + if epoch > 100: + lrate = 0.0003 + if epoch > 200: + lrate = 0.0002 + if epoch > 250: + lrate = 0.0001 + if epoch > 300: + lrate = 0.00003 + + return lrate + + +K.set_image_data_format('channels_first') + +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +test_labels = y_test +x_train = x_train.astype('float32') +x_test = x_test.astype('float32') + +#z-score +mean = np.mean(x_train,axis=(0,1,2,3)) +std = np.std(x_train,axis=(0,1,2,3)) +x_train = (x_train-mean)/(std+1e-7) +x_test = (x_test-mean)/(std+1e-7) + + +# Dumping test data and test labels +dir_prefix = "/home/hsharif3/Gitlab/hpvm/llvm/projects/hpvm-tensor-rt/model_params/alexnet2_cifar10/" + +dumpLabels(dir_prefix + "test_labels.bin", y_test) +dumpData(x_test, dir_prefix + "norm_cifar_input.bin", 10000, 3, 32, 32) + + + +num_classes = 10 +y_train = np_utils.to_categorical(y_train,num_classes) +y_test = np_utils.to_categorical(y_test,num_classes) + +weight_decay = 1e-4 +activation_type = 'tanh' + + +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + + +model = Sequential() +model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay), input_shape=x_train.shape[1:])) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(Conv2D(32, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(MaxPooling2D(pool_size=(2,2))) +model.add(Dropout(0.2)) + +model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(Conv2D(64, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(MaxPooling2D(pool_size=(2,2))) +model.add(Dropout(0.3)) + +model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(Conv2D(128, (3,3), padding='same', kernel_regularizer=regularizers.l2(weight_decay))) +model.add(Activation(activation_type)) +#model.add(BatchNormalization()) +model.add(MaxPooling2D(pool_size=(2,2))) +model.add(Dropout(0.4)) + +model.add(Flatten()) +model.add(Dense(num_classes)) +model.add(Activation('softmax')) +model.summary() + +#data augmentation +datagen = ImageDataGenerator( + rotation_range=15, + width_shift_range=0.1, + height_shift_range=0.1, + horizontal_flip=True, + ) + +datagen.fit(x_train) + + +#training +batch_size = 64 + +opt_rms = keras.optimizers.rmsprop(lr=0.001,decay=1e-6) +model.compile(loss='categorical_crossentropy', optimizer=opt_rms, metrics=['accuracy']) +model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size),\ + steps_per_epoch=x_train.shape[0] // batch_size, #epochs=350,\ + epochs=1, + verbose=1,validation_data=(x_test,y_test),callbacks=[LearningRateScheduler(lr_schedule2)]) +#save to disk +model_json = model.to_json() +with open('model.json', 'w') as json_file: + json_file.write(model_json) + model.save_weights('model.h5') + +#testing +scores = model.evaluate(x_test, y_test, batch_size=128, verbose=1) +print('\nTest result: %.3f loss: %.3f' % (scores[1]*100,scores[0])) + + +translate_to_approxhpvm(model, "alexnet2_cifar10_test/", x_test, test_labels, "alexnet2_cifar10/", y_test) +sys.exit(0) + + + +params = model.get_weights() +dumpConvWeights(dir_prefix + "conv1.bin", params[0], 32, 3, 3, 3) +dumpFcBias(dir_prefix + "conv1_bias.bin", params[1], 32) +dumpConvWeights(dir_prefix + "conv2.bin", params[2], 32, 32, 3, 3) +dumpFcBias(dir_prefix + "conv2_bias.bin", params[3], 32) +dumpConvWeights(dir_prefix + "conv3.bin", params[4], 64, 32, 3, 3) +dumpFcBias(dir_prefix + "conv3_bias.bin", params[5], 64) +dumpConvWeights(dir_prefix + "conv4.bin", params[6], 64, 64, 3, 3) +dumpFcBias(dir_prefix + "conv4_bias.bin", params[7], 64) +dumpConvWeights(dir_prefix + "conv5.bin", params[8], 128, 64, 3, 3) +dumpFcBias(dir_prefix + "conv5_bias.bin", params[9], 128) +dumpConvWeights(dir_prefix + "conv6.bin", params[10], 128, 128, 3, 3) +dumpFcBias(dir_prefix + "conv6_bias.bin", params[11], 128) + +dumpFcWeights(dir_prefix + "fc1.bin", params[12], 2048, 10) +dumpFcBias(dir_prefix + "fc1_bias.bin", params[13], 10) + + diff --git a/llvm/projects/keras/src/lenet.py b/llvm/projects/keras/src/lenet.py new file mode 100644 index 0000000000000000000000000000000000000000..3fa8123bd156c68183db3e43d24670c23990247d --- /dev/null +++ b/llvm/projects/keras/src/lenet.py @@ -0,0 +1,92 @@ + +import sys +import keras +from keras.datasets import mnist +from keras.models import Sequential +from keras.layers import Dense, Dropout, Flatten, Activation +from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D +from keras import backend as K +from frontend.approxhpvm_translator import translate_to_approxhpvm + + +batch_size = 128 +num_classes = 10 +epochs = 5 + +# input image dimensions +img_rows, img_cols = 28, 28 + + + +if __name__ == "__main__": + + K.set_image_data_format('channels_first') + + # the data, split between train and test sets + (x_train, y_train), (x_test, y_test) = mnist.load_data() + test_labels = y_test + + + if K.image_data_format() == 'channels_first': + x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) + x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) + input_shape = (1, img_rows, img_cols) + else: + x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) + x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) + input_shape = (img_rows, img_cols, 1) + + + print(K.image_data_format()) + + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train /= 255 + x_test /= 255 + print('x_train shape:', x_train.shape) + print(x_train.shape[0], 'train samples') + print(x_test.shape[0], 'test samples') + + # convert class vectors to binary class matrices + y_train = keras.utils.to_categorical(y_train, num_classes) + y_test = keras.utils.to_categorical(y_test, num_classes) + + + activation_type = 'relu' + + model = Sequential() + model.add(Conv2D(32, kernel_size=(5, 5), + activation=activation_type, + padding = 'same', + input_shape=input_shape)) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Conv2D(64, (5, 5), activation=activation_type, padding = 'same')) + model.add(ZeroPadding2D(padding = (1,1))) + model.add(Conv2D(64, (3, 3), strides = (2,2), activation=activation_type) ) + model.add(Flatten()) + model.add(Dense(1024, activation=activation_type)) + model.add(Dense(num_classes, activation=activation_type)) + model.add(Activation('softmax')) + + + + model.compile(loss=keras.losses.categorical_crossentropy, + optimizer=keras.optimizers.Adadelta(), + metrics=['accuracy']) + + model.fit(x_train, y_train, + batch_size=batch_size, + epochs=1, + verbose=1, + validation_data=(x_test, y_test)) + + + score = model.evaluate(x_test, y_test, verbose=0) + print('Test loss:', score[0]) + print('Test accuracy:', score[1]) + + + model.summary() + + translate_to_approxhpvm(model, "data/lenet_hpvm_batch/", x_test, test_labels, 10) + diff --git a/llvm/projects/keras/src/resnet.py b/llvm/projects/keras/src/resnet.py new file mode 100644 index 0000000000000000000000000000000000000000..6afa1c50fa470d038577ff8c3c4b5df43d9bab6b --- /dev/null +++ b/llvm/projects/keras/src/resnet.py @@ -0,0 +1,571 @@ +""" +#Trains a ResNet on the CIFAR10 dataset. + +ResNet v1: +[Deep Residual Learning for Image Recognition +](https://arxiv.org/pdf/1512.03385.pdf) + +ResNet v2: +[Identity Mappings in Deep Residual Networks +](https://arxiv.org/pdf/1603.05027.pdf) + + +Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti +:------------|--:|-------:|-----------------------:|---: +ResNet20 v1| 3| 92.16 %| 91.25 %|35 +ResNet32 v1| 5| 92.46 %| 92.49 %|50 +ResNet44 v1| 7| 92.50 %| 92.83 %|70 +ResNet56 v1| 9| 92.71 %| 93.03 %|90 +ResNet110 v1| 18| 92.65 %| 93.39+-.16 %|165 +ResNet164 v1| 27| - %| 94.07 %| - +ResNet1001 v1|N/A| - %| 92.39 %| - + + + +Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti +:------------|--:|-------:|-----------------------:|---: +ResNet20 v2| 2| - %| - %|--- +ResNet32 v2|N/A| NA %| NA %| NA +ResNet44 v2|N/A| NA %| NA %| NA +ResNet56 v2| 6| 93.01 %| NA %|100 +ResNet110 v2| 12| 93.15 %| 93.63 %|180 +ResNet164 v2| 18| - %| 94.54 %| - +ResNet1001 v2|111| - %| 95.08+-.14 %| - +""" + +from __future__ import print_function +import keras +from keras.layers import Dense, Conv2D, BatchNormalization, Activation +from keras.layers import AveragePooling2D, Input, Flatten, ZeroPadding2D +from keras.optimizers import Adam +from keras.callbacks import ModelCheckpoint, LearningRateScheduler +from keras.callbacks import ReduceLROnPlateau +from keras.preprocessing.image import ImageDataGenerator +from keras.regularizers import l2 +from keras import backend as K +from keras.models import Model +from keras.datasets import cifar10 +from keras import backend as K +import numpy as np +import os +import sys +from approxhpvm_translator import translate_to_approxhpvm +from weight_utils import dumpCalibrationData + + + +os.environ["CUDA_VISIBLE_DEVICES"] = "0" + + +K.set_image_data_format('channels_first') + + +# Training parameters +batch_size = 32 # orig paper trained all networks with batch_size=128 +#---- epochs = 200 +epochs = 2 +data_augmentation = True +num_classes = 10 + +# Subtracting pixel mean improves accuracy +subtract_pixel_mean = True + +# Model parameter +# ---------------------------------------------------------------------------- +# | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch +# Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti +# |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) +# ---------------------------------------------------------------------------- +# ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) +# ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) +# ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) +# ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) +# ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) +# ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) +# ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) +# --------------------------------------------------------------------------- +n = 3 + +# Model version +# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) +version = 1 + +# Computed depth from supplied model parameter n +if version == 1: + depth = n * 6 + 2 +elif version == 2: + depth = n * 9 + 2 + +# Model name, depth and version +model_type = 'ResNet%dv%d' % (depth, version) + +# Load the CIFAR10 data. +(x_train, y_train), (x_test, y_test) = cifar10.load_data() +test_labels = y_test +train_labels = y_train + +# Input image dimensions. +input_shape = x_train.shape[1:] + +# Normalize data. +x_train = x_train.astype('float32') / 255 +x_test = x_test.astype('float32') / 255 + +# If subtract pixel mean is enabled +if subtract_pixel_mean: + x_train_mean = np.mean(x_train, axis=0) + x_train -= x_train_mean + x_test -= x_train_mean + +print('x_train shape:', x_train.shape) +print(x_train.shape[0], 'train samples') +print(x_test.shape[0], 'test samples') +print('y_train shape:', y_train.shape) + +# Convert class vectors to binary class matrices. +y_train = keras.utils.to_categorical(y_train, num_classes) +y_test = keras.utils.to_categorical(y_test, num_classes) + + + + + + +def lr_schedule(epoch): + """Learning Rate Schedule + + Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. + Called automatically every epoch as part of callbacks during training. + + # Arguments + epoch (int): The number of epochs + + # Returns + lr (float32): learning rate + """ + lr = 1e-3 + if epoch > 180: + lr *= 0.5e-3 + elif epoch > 160: + lr *= 1e-3 + elif epoch > 120: + lr *= 1e-2 + elif epoch > 80: + lr *= 1e-1 + print('Learning rate: ', lr) + return lr + + +def resnet_layer(inputs, + num_filters=16, + kernel_size=3, + strides=1, + activation='relu', + batch_normalization=True, + conv_first=True): + """2D Convolution-Batch Normalization-Activation stack builder + + # Arguments + inputs (tensor): input tensor from input image or previous layer + num_filters (int): Conv2D number of filters + kernel_size (int): Conv2D square kernel dimensions + strides (int): Conv2D square stride dimensions + activation (string): activation name + batch_normalization (bool): whether to include batch normalization + conv_first (bool): conv-bn-activation (True) or + bn-activation-conv (False) + + # Returns + x (tensor): tensor as input to the next layer + """ + conv = Conv2D(num_filters, + kernel_size=kernel_size, + strides=strides, + padding='valid', # NOTE: using valid convs with explicit pad operation + kernel_initializer='he_normal', + kernel_regularizer=l2(1e-4)) + + padding_value = int((kernel_size - 1) / 2) + zero_padding = ZeroPadding2D(padding = (padding_value, padding_value)) + + # FIXME: Temporarily disabled batch normalization + batch_normalization = False + + x = inputs + x = zero_padding(x) + if conv_first: + x = conv(x) + if batch_normalization: + x = BatchNormalization()(x) + if activation is not None: + x = Activation(activation)(x) + else: + if batch_normalization: + x = BatchNormalization()(x) + if activation is not None: + x = Activation(activation)(x) + x = conv(x) + return x + + + +def resnet_v0(input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] + + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filters is + doubled. Within each stage, the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) + + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # Instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + if stack > 0 and res_block == 0: # first layer but not first stack + strides = 2 # downsample + y = resnet_layer(inputs=x, + num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + if stack > 0 and res_block == 0: # first layer but not first stack + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + x = Activation('relu')(x) + num_filters *= 1 + + # Add classifier on top. + # v1 does not use BN after last shortcut connection-ReLU + #-- x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + x = Dense(64)(y) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(x) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + +def resnet_v1_1(input_shape, depth, num_classes=10): + """ResNet Version 1 Model builder [a] + + Stacks of 2 x (3 x 3) Conv2D-BN-ReLU + Last ReLU is after the shortcut connection. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filters is + doubled. Within each stage, the layers have the same number filters and the + same number of filters. + Features maps sizes: + stage 0: 32x32, 16 + stage 1: 16x16, 32 + stage 2: 8x8, 64 + The Number of parameters is approx the same as Table 6 of [a]: + ResNet20 0.27M + ResNet32 0.46M + ResNet44 0.66M + ResNet56 0.85M + ResNet110 1.7M + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 6 != 0: + raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') + # Start model definition. + num_filters = 16 + num_res_blocks = int((depth - 2) / 6) + + inputs = Input(shape=input_shape) + x = resnet_layer(inputs=inputs) + # Instantiate the stack of residual units + for stack in range(3): + for res_block in range(num_res_blocks): + strides = 1 + if stack > 0 and res_block == 0: # first layer but not first stack + strides = 2 # downsample + y = resnet_layer(inputs=x, + num_filters=num_filters, + strides=strides) + y = resnet_layer(inputs=y, + num_filters=num_filters, + activation=None) + if stack > 0 and res_block == 0: # first layer but not first stack + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + x = Activation('relu')(x) + num_filters *= 2 + + + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + #activation='softmax', + kernel_initializer='he_normal')(y) + + outputs = Activation('softmax')(outputs) + + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + + + +def resnet_v2(input_shape, depth, num_classes=10): + """ResNet Version 2 Model builder [b] + + Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as + bottleneck layer + First shortcut connection per layer is 1 x 1 Conv2D. + Second and onwards shortcut connection is identity. + At the beginning of each stage, the feature map size is halved (downsampled) + by a convolutional layer with strides=2, while the number of filter maps is + doubled. Within each stage, the layers have the same number filters and the + same filter map sizes. + Features maps sizes: + conv1 : 32x32, 16 + stage 0: 32x32, 64 + stage 1: 16x16, 128 + stage 2: 8x8, 256 + + # Arguments + input_shape (tensor): shape of input image tensor + depth (int): number of core convolutional layers + num_classes (int): number of classes (CIFAR10 has 10) + + # Returns + model (Model): Keras model instance + """ + if (depth - 2) % 9 != 0: + raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') + # Start model definition. + num_filters_in = 16 + num_res_blocks = int((depth - 2) / 9) + + inputs = Input(shape=input_shape) + # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths + x = resnet_layer(inputs=inputs, + num_filters=num_filters_in, + conv_first=True) + + # Instantiate the stack of residual units + for stage in range(3): + for res_block in range(num_res_blocks): + activation = 'relu' + batch_normalization = True + strides = 1 + if stage == 0: + num_filters_out = num_filters_in * 4 + if res_block == 0: # first layer and first stage + activation = None + batch_normalization = False + else: + num_filters_out = num_filters_in * 2 + if res_block == 0: # first layer but not first stage + strides = 2 # downsample + + # bottleneck residual unit + y = resnet_layer(inputs=x, + num_filters=num_filters_in, + kernel_size=1, + strides=strides, + activation=activation, + batch_normalization=batch_normalization, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_in, + conv_first=False) + y = resnet_layer(inputs=y, + num_filters=num_filters_out, + kernel_size=1, + conv_first=False) + if res_block == 0: + # linear projection residual shortcut connection to match + # changed dims + x = resnet_layer(inputs=x, + num_filters=num_filters_out, + kernel_size=1, + strides=strides, + activation=None, + batch_normalization=False) + x = keras.layers.add([x, y]) + + num_filters_in = num_filters_out + + # Add classifier on top. + # v2 has BN-ReLU before Pooling + x = BatchNormalization()(x) + x = Activation('relu')(x) + x = AveragePooling2D(pool_size=8)(x) + y = Flatten()(x) + outputs = Dense(num_classes, + activation='softmax', + kernel_initializer='he_normal')(y) + + # Instantiate model. + model = Model(inputs=inputs, outputs=outputs) + return model + +depth = 20 + +if version == 2: + model = resnet_v2(input_shape=input_shape, depth=depth) +else: + model = resnet_v1_1(input_shape=input_shape, depth=depth) + + +model.compile(loss='categorical_crossentropy', + optimizer=Adam(lr=lr_schedule(0)), + metrics=['accuracy']) +model.summary() +print(model_type) + +# Prepare model model saving directory. +save_dir = os.path.join(os.getcwd(), 'saved_models') +model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type +if not os.path.isdir(save_dir): + os.makedirs(save_dir) +filepath = os.path.join(save_dir, model_name) + +# Prepare callbacks for model saving and for learning rate adjustment. +checkpoint = ModelCheckpoint(filepath=filepath, + monitor='val_acc', + verbose=1, + save_best_only=True) + +lr_scheduler = LearningRateScheduler(lr_schedule) + +lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), + cooldown=0, + patience=5, + min_lr=0.5e-6) + +callbacks = [checkpoint, lr_reducer, lr_scheduler] + +# Run training, with or without data augmentation. +if not data_augmentation: + print('Not using data augmentation.') + model.fit(x_train, y_train, + batch_size=batch_size, + epochs=epochs, + validation_data=(x_test, y_test), + shuffle=True, + callbacks=callbacks) +else: + print('Using real-time data augmentation.') + # This will do preprocessing and realtime data augmentation: + datagen = ImageDataGenerator( + # set input mean to 0 over the dataset + featurewise_center=False, + # set each sample mean to 0 + samplewise_center=False, + # divide inputs by std of dataset + featurewise_std_normalization=False, + # divide each input by its std + samplewise_std_normalization=False, + # apply ZCA whitening + zca_whitening=False, + # epsilon for ZCA whitening + zca_epsilon=1e-06, + # randomly rotate images in the range (deg 0 to 180) + rotation_range=0, + # randomly shift images horizontally + width_shift_range=0.1, + # randomly shift images vertically + height_shift_range=0.1, + # set range for random shear + shear_range=0., + # set range for random zoom + zoom_range=0., + # set range for random channel shifts + channel_shift_range=0., + # set mode for filling points outside the input boundaries + fill_mode='nearest', + # value used for fill_mode = "constant" + cval=0., + # randomly flip images + horizontal_flip=True, + # randomly flip images + vertical_flip=False, + # set rescaling factor (applied before any other transformation) + rescale=None, + # set function that will be applied on each input + preprocessing_function=None, + # image data format, either "channels_first" or "channels_last" + data_format="channels_first", + # fraction of images reserved for validation (strictly between 0 and 1) + validation_split=0.0) + + # Compute quantities required for featurewise normalization + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(x_train) + + # Fit the model on the batches generated by datagen.flow(). + model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), + validation_data=(x_test, y_test), + epochs=epochs, verbose=1, workers=4, + callbacks=callbacks) + +# Score trained model. +scores = model.evaluate(x_test, y_test, verbose=1) +print('Test loss:', scores[0]) +print('Test accuracy:', scores[1]) + + +dumpCalibrationData("calibration_data/resnet18_calib.bin", x_train, + "calibration_data/resnet18_train_labels.bin", train_labels) +sys.exit(0) + +#translate_to_approxhpvm(model, "resnet18_cifar10_hpvm/", x_test, test_labels) + +translate_to_approxhpvm(model, "resnet_test/", x_test, test_labels, 'resnet18_cifar10_promise/', y_test) diff --git a/llvm/projects/keras/src/vgg16_cifar10.py b/llvm/projects/keras/src/vgg16_cifar10.py new file mode 100644 index 0000000000000000000000000000000000000000..64e5d36e78fd728381dc864bf965ff68c4e7cf16 --- /dev/null +++ b/llvm/projects/keras/src/vgg16_cifar10.py @@ -0,0 +1,241 @@ + + +from __future__ import print_function +import keras +from keras.datasets import cifar10 +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation, Flatten +from keras.layers import Conv2D, MaxPooling2D, BatchNormalization +from keras import optimizers +import numpy as np +from keras.layers.core import Lambda +from keras import backend as K +from keras import regularizers +import os +import sys +from frontend.approxhpvm_translator import translate_to_approxhpvm +from frontend.weight_utils import dumpCalibrationData + + + +class cifar10vgg: + def __init__(self,train=True): + self.num_classes = 10 + self.weight_decay = 0.0005 + self.x_shape = [3,32,32] + + self.model = self.build_model() + if train: + self.model = self.train(self.model) + else: + self.model.load_weights('cifar10vgg.h5') + + + def build_model(self): + # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. + + model = Sequential() + weight_decay = self.weight_decay + + model.add(Conv2D(64, (3, 3), padding='same', + input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.3)) + + model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + # model.add(BatchNormalization()) + + model.add(Dropout(0.5)) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) + return model + + + def normalize(self,X_train,X_test): + #this function normalize inputs for zero mean and unit variance + # it is used when training a model. + # Input: training set and test set + # Output: normalized training set and test set according to the trianing set statistics. + mean = np.mean(X_train,axis=(0,1,2,3)) + std = np.std(X_train, axis=(0, 1, 2, 3)) + X_train = (X_train-mean)/(std+1e-7) + X_test = (X_test-mean)/(std+1e-7) + return X_train, X_test + + + def normalize_production(self,x): + #this function is used to normalize instances in production according to saved training set statistics + # Input: X - a training set + # Output X - a normalized training set according to normalization constants. + + #these values produced during first training and are general for the standard cifar10 training set normalization + mean = 120.707 + std = 64.15 + return (x-mean)/(std+1e-7) + + + def predict(self,x,normalize=True,batch_size=50): + if normalize: + x = self.normalize_production(x) + return self.model.predict(x,batch_size) + + + def train(self,model): + + #training parameters + batch_size = 128 + #maxepoches = 250 + #maxepoches = 250 + maxepoches = 30 + learning_rate = 0.01 + lr_decay = 1e-6 + lr_drop = 20 + # The data, shuffled and split between train and test sets: + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train, x_test = self.normalize(x_train, x_test) + + y_train = keras.utils.to_categorical(y_train, self.num_classes) + y_test = keras.utils.to_categorical(y_test, self.num_classes) + + def lr_scheduler(epoch): + return learning_rate * (0.5 ** (epoch // lr_drop)) + reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) + + #data augmentation + datagen = ImageDataGenerator( + featurewise_center=False, # set input mean to 0 over the dataset + samplewise_center=False, # set each sample mean to 0 + featurewise_std_normalization=False, # divide inputs by std of the dataset + samplewise_std_normalization=False, # divide each input by its std + zca_whitening=False, # apply ZCA whitening + rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) + width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) + height_shift_range=0.1, # randomly shift images vertically (fraction of total height) + horizontal_flip=True, # randomly flip images + vertical_flip=False) # randomly flip images + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(x_train) + + + + #optimization details + sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) + model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) + + + # training process in a for loop with learning rate drop every 25 epoches. + + historytemp = model.fit_generator(datagen.flow(x_train, y_train, + batch_size=batch_size), + steps_per_epoch=x_train.shape[0] // batch_size, + epochs=maxepoches, + validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) + + model.save_weights('cifar10vgg.h5') + return model + + + +if __name__ == '__main__': + + K.set_image_data_format('channels_first') + + os.environ["CUDA_VISIBLE_DEVICES"] = "1" + + (x_train, y_train), (x_test, y_test) = cifar10.load_data() + test_labels = y_test + train_labels = y_train + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + + y_train = keras.utils.to_categorical(y_train, 10) + y_test = keras.utils.to_categorical(y_test, 10) + + model = cifar10vgg() + + predicted_x = model.predict(x_test) + + norm_test = model.normalize_production(x_test) + + # Normalizing train data before dumping + #x_train, x_test = model.normalize(x_train, x_test) + x_train = model.normalize_production(x_train) + + # dumpCalibrationData("vgg16_cifar_calib.bin", x_train, "vgg16_train_labels.bin", train_labels) + + translate_to_approxhpvm(model.model, "data/vgg16_cifar10/", norm_test, test_labels, 10) + + residuals = np.argmax(predicted_x,1)!=np.argmax(y_test,1) + + loss = sum(residuals)/len(residuals) + print("the validation 0/1 loss is: ",loss) + + + diff --git a/llvm/projects/keras/src/vgg16_cifar100.py b/llvm/projects/keras/src/vgg16_cifar100.py new file mode 100644 index 0000000000000000000000000000000000000000..66fe6be669f984b92c8c602332c29e09968e9c8a --- /dev/null +++ b/llvm/projects/keras/src/vgg16_cifar100.py @@ -0,0 +1,243 @@ + +from __future__ import print_function +import os +import keras +from keras.datasets import cifar100 +from keras.preprocessing.image import ImageDataGenerator +from keras.models import Sequential +from keras.layers import Dense, Dropout, Activation, Flatten +from keras.layers import Conv2D, MaxPooling2D +from keras import optimizers +import numpy as np +from keras.layers.core import Lambda +from keras import backend as K +from keras import regularizers +from approxhpvm_translator import translate_to_approxhpvm +import sys +from weight_utils import dumpCalibrationData + + + +class cifar100vgg: + def __init__(self,train=True): + self.num_classes = 100 + self.weight_decay = 0.0005 + self.x_shape = [3,32,32] + + self.model = self.build_model() + if train: + self.model = self.train(self.model) + else: + self.model.load_weights('cifar100vgg.h5') + + + def build_model(self): + # Build the network of vgg for 10 classes with massive dropout and weight decay as described in the paper. + + model = Sequential() + weight_decay = self.weight_decay + + model.add(Conv2D(64, (3, 3), padding='same', + input_shape=self.x_shape,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.3)) + + model.add(Conv2D(64, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(128, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(256, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(MaxPooling2D(pool_size=(2, 2))) + + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + model.add(Dropout(0.4)) + + model.add(Conv2D(512, (3, 3), padding='same',kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + + model.add(MaxPooling2D(pool_size=(2, 2))) + model.add(Dropout(0.5)) + + model.add(Flatten()) + model.add(Dense(512,kernel_regularizer=regularizers.l2(weight_decay))) + model.add(Activation('relu')) + #model.add(BatchNormalization()) + + model.add(Dropout(0.5)) + model.add(Dense(self.num_classes)) + model.add(Activation('softmax')) + return model + + + def normalize(self,X_train,X_test): + #this function normalize inputs for zero mean and unit variance + # it is used when training a model. + # Input: training set and test set + # Output: normalized training set and test set according to the trianing set statistics. + mean = np.mean(X_train,axis=(0,1,2,3)) + std = np.std(X_train, axis=(0, 1, 2, 3)) + print(mean) + print(std) + X_train = (X_train-mean)/(std+1e-7) + X_test = (X_test-mean)/(std+1e-7) + return X_train, X_test + + def normalize_production(self,x): + #this function is used to normalize instances in production according to saved training set statistics + # Input: X - a training set + # Output X - a normalized training set according to normalization constants. + + #these values produced during first training and are general for the standard cifar10 training set normalization + mean = 121.936 + std = 68.389 + return (x-mean)/(std+1e-7) + + def predict(self,x,normalize=True,batch_size=50): + if normalize: + x = self.normalize_production(x) + return self.model.predict(x,batch_size) + + def train(self,model): + + #training parameters + batch_size = 128 + #maxepoches = 250 + #maxepoches = 400 + maxepoches = 4 + learning_rate = 0.05 + lr_decay = 1e-6 + lr_drop = 20 + + # The data, shuffled and split between train and test sets: + (x_train, y_train), (x_test, y_test) = cifar100.load_data() + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + x_train, x_test = self.normalize(x_train, x_test) + + y_train = keras.utils.to_categorical(y_train, self.num_classes) + y_test = keras.utils.to_categorical(y_test, self.num_classes) + + + def lr_scheduler(epoch): + return learning_rate * (0.5 ** (epoch // lr_drop)) + reduce_lr = keras.callbacks.LearningRateScheduler(lr_scheduler) + + + #data augmentation + datagen = ImageDataGenerator( + featurewise_center=False, # set input mean to 0 over the dataset + samplewise_center=False, # set each sample mean to 0 + featurewise_std_normalization=False, # divide inputs by std of the dataset + samplewise_std_normalization=False, # divide each input by its std + zca_whitening=False, # apply ZCA whitening + rotation_range=15, # randomly rotate images in the range (degrees, 0 to 180) + width_shift_range=0.1, # randomly shift images horizontally (fraction of total width) + height_shift_range=0.1, # randomly shift images vertically (fraction of total height) + horizontal_flip=True, # randomly flip images + vertical_flip=False) # randomly flip images + # (std, mean, and principal components if ZCA whitening is applied). + datagen.fit(x_train) + + + + #optimization details + sgd = optimizers.SGD(lr=learning_rate, decay=lr_decay, momentum=0.9, nesterov=True) + model.compile(loss='categorical_crossentropy', optimizer=sgd,metrics=['accuracy']) + + + # training process in a for loop with learning rate drop every 25 epoches. + + historytemp = model.fit_generator(datagen.flow(x_train, y_train, + batch_size=batch_size), + steps_per_epoch=x_train.shape[0] // batch_size, + epochs=maxepoches, + validation_data=(x_test, y_test),callbacks=[reduce_lr],verbose=2) + model.save_weights('cifar100vgg.h5') + return model + +if __name__ == '__main__': + + K.set_image_data_format('channels_first') + os.environ["CUDA_VISIBLE_DEVICES"] = "1" + + (x_train, y_train), (x_test, y_test) = cifar100.load_data() + test_labels = y_test + train_labels = y_train + + x_train = x_train.astype('float32') + x_test = x_test.astype('float32') + + y_train = keras.utils.to_categorical(y_train, 100) + y_test = keras.utils.to_categorical(y_test, 100) + + model = cifar100vgg() + + predicted_x = model.predict(x_test) + + norm_test = model.normalize_production(x_test) + + x_train = model.normalize_production(x_train) + + dumpCalibrationData("calibration_data/vgg16_cifar100_calib.bin", x_train, + "calibration_data/vgg16_cifar100_train_labels.bin", train_labels) + sys.exit(0) + + + translate_to_approxhpvm(model.model, "vgg16_cifar100_test/", norm_test, test_labels, + "vgg16_cifar100_front", y_test) + + + residuals = (np.argmax(predicted_x,1)!=np.argmax(y_test,1)) + loss = sum(residuals)/len(residuals) + print("the validation 0/1 loss is: ",loss) + +