From 392651c5ab6aeb20bb1b12926c31e397e3a93da0 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@tyler.cs.illinois.edu>
Date: Tue, 17 Sep 2019 01:39:19 -0500
Subject: [PATCH] Adding better diagnostic messages and knob for smart Quant

---
 .../keras/frontend/approxhpvm_translator.py   | 33 +++++--
 .../projects/keras/frontend/quantize_utils.py | 99 +++++++++++++++++++
 2 files changed, 122 insertions(+), 10 deletions(-)
 create mode 100644 llvm/projects/keras/frontend/quantize_utils.py

diff --git a/llvm/projects/keras/frontend/approxhpvm_translator.py b/llvm/projects/keras/frontend/approxhpvm_translator.py
index 87f53ca1be..21e109d9ed 100644
--- a/llvm/projects/keras/frontend/approxhpvm_translator.py
+++ b/llvm/projects/keras/frontend/approxhpvm_translator.py
@@ -172,7 +172,7 @@ class DFGNode:
         print("\t strides = ", self.strides)
 
         
-      if nodeHasActivation(self):  
+      if layerHasActivationAttr(self):
         self.activation_type = layer.activation.__name__
         print ("\t Activation = ", self.activation_type)
   
@@ -397,7 +397,14 @@ class TensorRtTranslator:
       self.program_str += inst_str
 
 
-    if layer_type == "Dense":
+      if strides[0] > 1 and cur_node.padding.strip() == "same":
+        print ("!ERROR: Same Padding not supported for Conv with Stride > 1")
+        print ("Use: ZeroPadding2D(padding=(" + str(padding) + "," + str(padding) + "));\n");
+        sys.exit(0)
+
+      
+
+    elif layer_type == "Dense":
       input_var_name = self.getSingleInputName(cur_node)
 
       weights = cur_node.weights
@@ -409,7 +416,7 @@ class TensorRtTranslator:
       self.program_str += inst_str
 
       
-    if self.hasBiasAdd(cur_node):
+    elif self.hasBiasAdd(cur_node):
       out_var_name2 = self.getVariableName(cur_node)    
 
       inst_str = "void* " + out_var_name2 + " = "
@@ -423,23 +430,26 @@ class TensorRtTranslator:
       out_var_name1 = out_var_name2
 
 
-    if layer_type == "Activation":
+    elif layer_type == "Activation":
       input_var_name = self.getSingleInputName(cur_node)
       
       inst_str = genActivationCallStr(input_var_name, out_var_name1, cur_node.activation_type)
       self.program_str += inst_str
       
     
-    if self.hasActivation(cur_node) and layer_type != "Activation":
+    elif self.hasActivation(cur_node) and layer_type != "Activation":
       activation_type = cur_node.activation_type
       out_var_name3 = self.getVariableName(cur_node)    
 
       inst_str = genActivationCallStr(out_var_name1, out_var_name3, activation_type)
       self.program_str += inst_str  
 
+      if activation_type == "softmax":
+        print ("Softmax canNOT be part of Dense/Conv Op. Insert: Activation('softmax');")
+        sys.exit(0)
         
 
-    if layer_type == "BatchNormalization":
+    elif layer_type == "BatchNormalization":
       input_var_name = self.getSingleInputName(cur_node)
 
       inst_str = "void* " + out_var_name1 + " = "
@@ -454,7 +464,7 @@ class TensorRtTranslator:
       self.program_str += inst_str
       
       
-    if layer_type == "Add":  
+    elif layer_type == "Add":  
       input_vars = self.getMultipleInputNames(cur_node)
       
       inst_str = "void* " + out_var_name1 + " = "
@@ -462,7 +472,7 @@ class TensorRtTranslator:
       self.program_str += inst_str
 
       
-    if layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D":  
+    elif layer_type == "MaxPooling2D" or layer_type == "AveragePooling2D":  
       input_var_name = self.getSingleInputName(cur_node)
 
       pool_size = cur_node.pool_size
@@ -481,8 +491,12 @@ class TensorRtTranslator:
       inst_str +=  "," + str(padding) + "," + str(padding) + "," + str(strides[0]) + "," + str(strides[1])
       inst_str += "); \n"
       self.program_str += inst_str
+    #else:
+    #  print ("ERROR: Operator = ", layer_type, " is NOT currently supported")
 
-
+      
+    
+      
       
           
      
@@ -622,7 +636,6 @@ class TensorRtTranslator:
 
       if layer_type == "BatchNormalization":
         weights = layer.get_weights()
-        print ("\n\n ************ len(weights = ", len(weights), "\n\n")
         
         gamma_w = weights[0]
         gamma_id = layer_name + "_gamma"
diff --git a/llvm/projects/keras/frontend/quantize_utils.py b/llvm/projects/keras/frontend/quantize_utils.py
new file mode 100644
index 0000000000..998bd31ace
--- /dev/null
+++ b/llvm/projects/keras/frontend/quantize_utils.py
@@ -0,0 +1,99 @@
+
+
+from scipy import stats
+from numpy import linalg
+import numpy as np
+import sys
+
+
+# NOTE: enable/disable smart quantization of weights and activations
+smart_quantization = False
+
+
+def quantize_arr(input_arr, min_val, max_val):
+
+    quantize_range = 256.0
+    input_range = max_val - min_val
+    mul_factor = input_range / quantize_range
+
+    v1 = np.subtract(input_arr, min_val)
+    v2 = np.divide(v1, mul_factor)
+    v3 = v2.astype(int)
+    v4 = np.multiply(v3, mul_factor)
+    v5 = np.add(v4, min_val)
+    v6 = np.clip(v5, min_val, max_val)
+
+    return v6
+
+
+def compute_norm(a1, a2):
+
+    norm_inp = np.subtract(a1, a2)
+    #norm = linalg.norm(norm_inp, ord = 1)
+    norm = np.sum(np.abs(norm_inp))
+    print ("*** norm = ", norm)
+        
+    return norm
+    
+
+def get_best_quant_range(input_arr):
+
+    # For disabled smart quantization, skip expensive quant range computation
+    if smart_quantization == False:
+        min_val = np.percentile(input_arr, 0.1)
+        max_val = np.percentile(input_arr, 99.9)
+        return (min_val, max_val)
+
+
+    # Trying different threshold values for INT8 quantization    
+    min_percentiles = [0, 0.1, 0.2, 0.3]
+    max_percentiles = [100, 99.9, 99.8, 99.7]
+
+      
+    min_norm = 100000000
+    min_pair = (0, 100)
+    range_vals = (0, 0)
+    for i in min_percentiles:
+      for j in max_percentiles:
+        print (" i = ", i, " j = ", j, " \n")
+        min_val = np.percentile(input_arr, i)
+        max_val = np.percentile(input_arr, j)
+
+        res = quantize_arr(input_arr, min_val, max_val)    
+        norm = compute_norm(res, input_arr)
+
+        if norm < min_norm:
+          min_norm = norm
+          min_pair = (i, j)
+          range_vals = (min_val, max_val)
+
+    print ("--- min_norm = ", min_norm, " , min_pair = ", min_pair ,  "  range_vals = ", range_vals)
+
+
+    return range_vals                
+    
+    
+
+
+
+     
+if __name__ == "__main__":
+
+
+    vals = np.zeros((2,3))
+    vals[0][0] = 1.2
+    vals[0][1] = 0.48
+    vals[0][2] = 0.5
+    vals[1][0] = -0.3
+    vals[1][1] = 0.25
+    vals[1][2] = 0.46
+
+    input_arr = np.array(vals)
+
+    res = quantize_arr(input_arr, -0.3, 1.4)
+
+    print (res, "\n")
+
+    divergence = compute_norm(res, input_arr) 
+
+    print ("divergence = ", divergence, "\n")
-- 
GitLab