From 392651c5ab6aeb20bb1b12926c31e397e3a93da0 Mon Sep 17 00:00:00 2001 From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu> Date: Tue, 17 Sep 2019 01:39:19 -0500 Subject: [PATCH] Adding better diagnostic messages and knob for smart Quant --- .../keras/frontend/approxhpvm_translator.py | 33 +++++-- .../projects/keras/frontend/quantize_utils.py | 99 +++++++++++++++++++ 2 files changed, 122 insertions(+), 10 deletions(-) create mode 100644 llvm/projects/keras/frontend/quantize_utils.py diff --git a/llvm/projects/keras/frontend/approxhpvm_translator.py b/llvm/projects/keras/frontend/approxhpvm_translator.py index 87f53ca1be..21e109d9ed 100644 --- a/llvm/projects/keras/frontend/approxhpvm_translator.py +++ b/llvm/projects/keras/frontend/approxhpvm_translator.py @@ -172,7 +172,7 @@ class DFGNode: print("\t strides = ", self.strides) - if nodeHasActivation(self): + if layerHasActivationAttr(self): self.activation_type = layer.activation.__name__ print ("\t Activation = ", self.activation_type) @@ -397,7 +397,14 @@ class TensorRtTranslator: self.program_str += inst_str - if layer_type == "Dense": + if strides[0] > 1 and cur_node.padding.strip() == "same": + print ("!ERROR: Same Padding not supported for Conv with Stride > 1") + print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n"); + sys.exit(0) + + + + elif layer_type == "Dense": input_var_name = self.getSingleInputName(cur_node) weights = cur_node.weights @@ -409,7 +416,7 @@ class TensorRtTranslator: self.program_str += inst_str - if self.hasBiasAdd(cur_node): + elif self.hasBiasAdd(cur_node): out_var_name2 = self.getVariableName(cur_node) inst_str = "void* " + out_var_name2 + " = " @@ -423,23 +430,26 @@ class TensorRtTranslator: out_var_name1 = out_var_name2 - if layer_type == "Activation": + elif layer_type == "Activation": input_var_name = self.getSingleInputName(cur_node) inst_str = genActivationCallStr(input_var_name, out_var_name1, cur_node.activation_type) self.program_str += inst_str - if self.hasActivation(cur_node) and layer_type != "Activation": + elif self.hasActivation(cur_node) and layer_type != "Activation": activation_type = cur_node.activation_type out_var_name3 = self.getVariableName(cur_node) inst_str = genActivationCallStr(out_var_name1, out_var_name3, activation_type) self.program_str += inst_str + if activation_type == "softmax": + print ("Softmax canNOT be part of Dense/Conv Op. Insert: Activation('softmax');") + sys.exit(0) - if layer_type == "BatchNormalization": + elif layer_type == "BatchNormalization": input_var_name = self.getSingleInputName(cur_node) inst_str = "void* " + out_var_name1 + " = " @@ -454,7 +464,7 @@ class TensorRtTranslator: self.program_str += inst_str - if layer_type == "Add": + elif layer_type == "Add": input_vars = self.getMultipleInputNames(cur_node) inst_str = "void* " + out_var_name1 + " = " @@ -462,7 +472,7 @@ class TensorRtTranslator: self.program_str += inst_str - if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": + elif layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D": input_var_name = self.getSingleInputName(cur_node) pool_size = cur_node.pool_size @@ -481,8 +491,12 @@ class TensorRtTranslator: inst_str += "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1]) inst_str += "); \n" self.program_str += inst_str + #else: + # print ("ERROR: Operator = ", layer_type, " is NOT currently supported") - + + + @@ -622,7 +636,6 @@ class TensorRtTranslator: if layer_type == "BatchNormalization": weights = layer.get_weights() - print ("\n\n ************ len(weights = ", len(weights), "\n\n") gamma_w = weights[0] gamma_id = layer_name + "_gamma" diff --git a/llvm/projects/keras/frontend/quantize_utils.py b/llvm/projects/keras/frontend/quantize_utils.py new file mode 100644 index 0000000000..998bd31ace --- /dev/null +++ b/llvm/projects/keras/frontend/quantize_utils.py @@ -0,0 +1,99 @@ + + +from scipy import stats +from numpy import linalg +import numpy as np +import sys + + +# NOTE: enable/disable smart quantization of weights and activations +smart_quantization = False + + +def quantize_arr(input_arr, min_val, max_val): + + quantize_range = 256.0 + input_range = max_val - min_val + mul_factor = input_range / quantize_range + + v1 = np.subtract(input_arr, min_val) + v2 = np.divide(v1, mul_factor) + v3 = v2.astype(int) + v4 = np.multiply(v3, mul_factor) + v5 = np.add(v4, min_val) + v6 = np.clip(v5, min_val, max_val) + + return v6 + + +def compute_norm(a1, a2): + + norm_inp = np.subtract(a1, a2) + #norm = linalg.norm(norm_inp, ord = 1) + norm = np.sum(np.abs(norm_inp)) + print ("*** norm = ", norm) + + return norm + + +def get_best_quant_range(input_arr): + + # For disabled smart quantization, skip expensive quant range computation + if smart_quantization == False: + min_val = np.percentile(input_arr, 0.1) + max_val = np.percentile(input_arr, 99.9) + return (min_val, max_val) + + + # Trying different threshold values for INT8 quantization + min_percentiles = [0, 0.1, 0.2, 0.3] + max_percentiles = [100, 99.9, 99.8, 99.7] + + + min_norm = 100000000 + min_pair = (0, 100) + range_vals = (0, 0) + for i in min_percentiles: + for j in max_percentiles: + print (" i = ", i, " j = ", j, " \n") + min_val = np.percentile(input_arr, i) + max_val = np.percentile(input_arr, j) + + res = quantize_arr(input_arr, min_val, max_val) + norm = compute_norm(res, input_arr) + + if norm < min_norm: + min_norm = norm + min_pair = (i, j) + range_vals = (min_val, max_val) + + print ("--- min_norm = ", min_norm, " , min_pair = ", min_pair , " range_vals = ", range_vals) + + + return range_vals + + + + + + +if __name__ == "__main__": + + + vals = np.zeros((2,3)) + vals[0][0] = 1.2 + vals[0][1] = 0.48 + vals[0][2] = 0.5 + vals[1][0] = -0.3 + vals[1][1] = 0.25 + vals[1][2] = 0.46 + + input_arr = np.array(vals) + + res = quantize_arr(input_arr, -0.3, 1.4) + + print (res, "\n") + + divergence = compute_norm(res, input_arr) + + print ("divergence = ", divergence, "\n") -- GitLab