From 49f391792041be337dd3e5c9487b957559f02a45 Mon Sep 17 00:00:00 2001
From: Hashim Sharif <hsharif3@miranda.cs.illinois.edu>
Date: Wed, 30 Jun 2021 20:54:40 -0500
Subject: [PATCH] HPVM NVDLA backend INT8 mode working with mini-era CNN

---
 .../Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp  | 108 ++++++++++++------
 .../miniera-hpvm/src/miniera-hpvm.cpp         |  46 ++++----
 hpvm/tools/hpvm-clang/main.py.in              |   3 +-
 3 files changed, 96 insertions(+), 61 deletions(-)

diff --git a/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp
index e8f47ff5a0..977e4b6075 100644
--- a/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp
+++ b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp
@@ -1,3 +1,4 @@
+
 #define ENABLE_ASSERTS
 
 #define DEBUG_TYPE "DFG2NVDLA"
@@ -329,6 +330,7 @@ Weights CGT_NVDLA::readTrainedWeights(User *TensorPtr,
 		DEBUG(errs() << "Data file is not found. Aborting.\n");
 		abort();
 	}
+	
 	fseek(file, file_header_size, SEEK_CUR);
 	size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file);
 	DEBUG(errs() << "BYTES READ: " << bytes_read << "\n");
@@ -1380,27 +1382,31 @@ fail:
 }
 
 bool HPVM2NVDLA::runOnModule(Module &M) {
+  
   DEBUG(errs() << "**************HPVM2NVDLA PASS****************\n");
   
   	NvDlaError e = NvDlaError_TestApplicationFailed;
 	TestAppArgs testAppArgs = defaultTestAppArgs;
 	
 	// Get the HPVM IR graph
-  BuildDFG &DFG = getAnalysis<BuildDFG>();
-  std::vector<DFInternalNode *> Roots = DFG.getRoots();
+	BuildDFG &DFG = getAnalysis<BuildDFG>();
+	std::vector<DFInternalNode *> Roots = DFG.getRoots();
 
-  // Visitor for Code Generation Graph Traversal
-  CGT_NVDLA *CGTVisitor = new CGT_NVDLA(M, DFG);
+	// Visitor for Code Generation Graph Traversal
+	CGT_NVDLA *CGTVisitor = new CGT_NVDLA(M, DFG);
 	
   	if(ComputePrecision == "INT8" || ComputePrecision == "int8") {
   		testAppArgs.computePrecision = nvdla::DataType::INT8;
   		testAppArgs.quantizationMode = nvdla::QuantizationMode::PER_KERNEL;
   		testAppArgs.configtarget = std::string("nv_small");
+		errs() << " INT8 mode \n";
   	} else {
   		testAppArgs.computePrecision = nvdla::DataType::HALF;
   		testAppArgs.quantizationMode = nvdla::QuantizationMode::NONE;
   		testAppArgs.configtarget = std::string("nv_full");
+		errs() << "FP16 mode \n";
   	}
+	
 	testAppArgs.profileName = std::string("hpvm-mod");
 	testAppArgs.calibTable = CalibTablePath;//std::string("output_scales.txt");
 	testAppArgs.outputPath = std::string(".");
@@ -1419,7 +1425,9 @@ bool HPVM2NVDLA::runOnModule(Module &M) {
 	return false;
 }
 
+
 NvDlaError CGT_NVDLA::compileProfile(const TestAppArgs* appArgs, TestInfo* i) {
+
     NvDlaError e = NvDlaSuccess;
     std::string profileName = "";
     std::string targetConfigName = "";
@@ -1438,26 +1446,38 @@ NvDlaError CGT_NVDLA::compileProfile(const TestAppArgs* appArgs, TestInfo* i) {
 
     targetConfigName = appArgs->configtarget;
 
+    errs()<<" Compiling NVDLA code *** \n";
+    
     // Determine profile
     PROPAGATE_ERROR_FAIL(generateProfile(appArgs, &profileName, i));
 
+    errs() << "compiling profile \"%s\"... config \"%s\"...\n" << profileName.c_str() << targetConfigName.c_str() <<"\n";
+    
     // Compile
     DEBUG(NvDlaDebugPrintf("compiling profile \"%s\"... config \"%s\"...\n", profileName.c_str(), targetConfigName.c_str()));
-    PROPAGATE_ERROR_FAIL(compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable));
 
+    //PROPAGATE_ERROR_FAIL(compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable));
+
+    compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable);
+    
     // Get loadable buffer and dump it into a file
     PROPAGATE_ERROR_FAIL(compiler->getLoadableImageSize(profileName.c_str(),
-                                                    &size));
+							&size));
     if (size == 0) {
         ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter,
                             "Invalid size for a loadable");
+	errs()<<" ERROR: Invalid size for loadable \n";
     }
 
+    errs()<<" size of loadable module = " << size;
+    
     buffer = (NvU8 *) NvDlaAlloc(size);
     if (buffer == NULL) {
         ORIGINATE_ERROR_FAIL(NvDlaError_InsufficientMemory,
                             "Failed to allocate buffer for loadable");
+	errs() << "ERROR: Failed to allocate buffer or loadable \n";
     }
+    
     PROPAGATE_ERROR_FAIL(compiler->getLoadableImage(profileName.c_str(),
                                                     buffer));
     fileName = profileName + ".nvdla";
@@ -1588,6 +1608,8 @@ fail:
 NvDlaError CGT_NVDLA::generateTensorScales(const TestAppArgs* appArgs, TestInfo* i, nvdla::INetwork* network) {
     NvDlaError e = NvDlaSuccess;
 
+    errs() << " generateTenorScales  \n";
+    
     std::vector<nvdla::ILayer*> networkLayers = network->getLayers();
     std::vector<nvdla::ITensor*> networkInputs = network->getInputs();
 
@@ -1626,13 +1648,16 @@ fail:
     return e;
 }
 
+
+
 NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i, nvdla::INetwork* network) {
-    NvDlaError e = NvDlaSuccess;
+
+  NvDlaError e = NvDlaSuccess;
     NvDlaStatType stat;
-    std::string calibTableFile = /*i->calibTablesPath + "/" + */appArgs->calibTable;
+    std::string calibTableFile = appArgs->calibTable;
 
-    //PROPAGATE_ERROR_FAIL(NvDlaStat(calibTableFile.c_str(), &stat));
-    DEBUG(errs() << "***********READING TENSOR SCALESi*************\n");
+    errs() << "***********READING TENSOR SCALESi*************\n";
+    
     std::ifstream infile(calibTableFile.c_str());
     std::string line;
     std::map<std::string, float> LayerNameToScaleMap;
@@ -1643,13 +1668,16 @@ NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i,
         std::string delimiter = ":";
         std::string layer_name = line.substr(0, line.find(delimiter));
         std::string Scale = line.substr(line.find(delimiter) + 1);
-        DEBUG(errs() << "LAYER NAME: " << layer_name << "\n");
-        DEBUG(errs() << "SCALE: " << Scale << "\n");
+
+	errs() << "LAYER NAME: " << layer_name << "\n";
+        errs() << "SCALE: " << Scale << "\n";
+
         size_t size;
         LayerNameToScaleMap[layer_name] = std::stof(Scale, &size);
     }
+    
     infile.close();
-    DEBUG(errs() << "GOT TENSOR SCALES FROM CALIB TABLE\n");
+    errs() << "GOT TENSOR SCALES FROM CALIB TABLE \n";
 
     std::vector<nvdla::ILayer*> networkLayers = network->getLayers();
     std::vector<nvdla::ITensor*> networkInputs = network->getInputs();
@@ -1665,36 +1693,42 @@ NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i,
         PROPAGATE_ERROR_FAIL(Input->setChannelDynamicRange(-1, min, max) );
         const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>("data", scale));
     }
-    DEBUG(errs() << "PER LAYER CALIB\n");
+    
+    errs() << "PER LAYER CALIB  \n";
+    
     for (auto *Layer : networkLayers) {
          NvF32 scale = 0.0f;
-                NvF32 min = 0.0f;
-                NvF32 max = 0.0f;
-		std::string tName = Layer->getName();
-                DEBUG(errs() << "SETTING SCALE FOR LAYER NAME: " << tName << "\n");
-		nvdla::ITensor* outTensor = Layer->getOutput(0);
-                auto it = LayerNameToScaleMap.find(tName);
-                if (it != LayerNameToScaleMap.end()) {
-                        DEBUG(errs() << "SET SCALE FOR NAME: " << tName << "\n");
-			DEBUG(errs() << "SCALE: " << it->second << "\n");
-                        scale = it->second;
-                        min = scale * -127.0f;
-                        max = scale * 127.0f;
-                } else {
-                        DEBUG(errs() << "SET DEFAULT SCALE FOR NAME: " << tName << "\n");
-                        DEBUG(errs() << "SCALE: 1\n");
-			scale = 1;
-                        min = scale * -127.0f;
-                        max = scale * 127.0f;
-                }
-                //else {
-                //      ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "Atleast 1 of scale or min-max should be specified for %s\n", tName.c_str());
-                //}
+	 NvF32 min = 0.0f;
+	 NvF32 max = 0.0f;
+
+	 std::string tName = Layer->getName();
+	 errs() << "SETTING SCALE FOR LAYER NAME: " << tName << "\n";
+
+	 nvdla::ITensor* outTensor = Layer->getOutput(0);
+	 auto it = LayerNameToScaleMap.find(tName);
+
+	 if (it != LayerNameToScaleMap.end()) {
+	   errs() << "SET SCALE FOR NAME: " << tName << "\n";
+	   errs() << "SCALE: " << it->second << "\n";
+	   scale = it->second;
+	   min = scale * -127.0f;
+	   max = scale * 127.0f;
+	 } else {
+	   
+	   DEBUG(errs() << "SET DEFAULT SCALE FOR NAME: " << tName << "\n");
+	   DEBUG(errs() << "SCALE: 1\n");
+
+	   scale = 1;
+	   min = scale * -127.0f;
+	   max = scale * 127.0f;
+	 }
+		
         PROPAGATE_ERROR_FAIL( outTensor->setChannelDynamicRange(-1, min, max) );
         const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>(tName, scale));
     }
 
-    DEBUG(errs() << "DONE PARSING CALIBRATION TABLE\n");
+    errs() << "DONE PARSING CALIBRATION TABLE ----- \n";
+    
  fail:
      return e;
 }
diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp
index 3ab28ea74e..9c9099cda1 100644
--- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp
+++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp
@@ -369,29 +369,29 @@ int main(){
   //std::string input_path =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/input_fp16.bin"; 
   std::string labels_path =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/labels_fp16.bin"; 
   //char conv2d_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w_fp16.bin";
-  void* conv2d_1_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w.bin", 0,32,3,3,3); 
-  //char conv2d_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16.bin";
-  void* conv2d_1_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b.bin", 0,1,32, 1, 1);//30,30); 
-  //char conv2d_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16.bin"; 
-  void* conv2d_2_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w.bin", 0,32,32,3,3); 
-  //char conv2d_2_b_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16.bin"; 
-  void* conv2d_2_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b.bin", 0,1,32, 1, 1);//28,28); 
-  //char conv2d_3_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16.bin"; 
-  void* conv2d_3_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w.bin", 0,64,32,3,3); 
-  //char conv2d_3_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16.bin"; 
-  void* conv2d_3_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b.bin", 0,1,64, 1, 1);//12,12); 
-  //char conv2d_4_w_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16.bin"; 
-  void* conv2d_4_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w.bin", 0,64,64,3,3); 
-  //char conv2d_4_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16.bin"; 
-  void* conv2d_4_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b.bin", 0,1,64, 1, 1);//10,10); 
-  //char dense_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16.bin"; 
-  void* dense_1_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w.bin", 0,1,1,1600,256); 
-  //char dense_1_b_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16.bin"; 
-  void* dense_1_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b.bin", 0,1,256,1,1); 
-  //char dense_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16.bin"; 
-  void* dense_2_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w.bin", 0,1,1,256,5); 
-  //char dense_2_b_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16.bin"; 
-  void* dense_2_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b.bin", 0,1,5,1,1); 
+  void* conv2d_1_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w_fp16.bin", 0,32,3,3,3); 
+  //char conv2d_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16_fp16.bin";
+  void* conv2d_1_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16.bin", 0,1,32, 1, 1);//30,30); 
+  //char conv2d_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16_fp16.bin"; 
+  void* conv2d_2_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16.bin", 0,32,32,3,3); 
+  //char conv2d_2_b_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16_fp16.bin"; 
+  void* conv2d_2_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16.bin", 0,1,32, 1, 1);//28,28); 
+  //char conv2d_3_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16_fp16.bin"; 
+  void* conv2d_3_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16.bin", 0,64,32,3,3); 
+  //char conv2d_3_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16_fp16.bin"; 
+  void* conv2d_3_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16.bin", 0,1,64, 1, 1);//12,12); 
+  //char conv2d_4_w_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16_fp16.bin"; 
+  void* conv2d_4_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16.bin", 0,64,64,3,3); 
+  //char conv2d_4_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16_fp16.bin"; 
+  void* conv2d_4_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16.bin", 0,1,64, 1, 1);//10,10); 
+  //char dense_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16_fp16.bin"; 
+  void* dense_1_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16.bin", 0,1,1,1600,256); 
+  //char dense_1_b_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16_fp16.bin"; 
+  void* dense_1_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16.bin", 0,1,256,1,1); 
+  //char dense_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16_fp16.bin"; 
+  void* dense_2_w =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16.bin", 0,1,1,256,5); 
+  //char dense_2_b_path[] =  "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16_fp16.bin"; 
+  void* dense_2_b =  readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16.bin", 0,1,5,1,1); 
 
   //void* input = readTrainedWeights(input_path, 0,1,3,32,32); 
   //uint32_t* labels = readLabels3(labels_path, 500); 
diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in
index 0c047de043..7d4d8644e6 100644
--- a/hpvm/tools/hpvm-clang/main.py.in
+++ b/hpvm/tools/hpvm-clang/main.py.in
@@ -60,7 +60,8 @@ def compile_hpvm_c(
         passes += ["LLVMHPVM2NVDLAPass"]
         pass_flags += [
             "hpvm-nvdla",
-            "cprecision=fp16",
+            #"cprecision=fp16",
+            "cprecision=int8",
             "calib-table=calib.txt"
         ]
         
-- 
GitLab