diff --git a/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp index e8f47ff5a0b357c76edb3a1d6022e84ac3841197..977e4b6075805d6913fcf7738a27f4f2e17e9c24 100644 --- a/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp +++ b/hpvm/lib/Transforms/HPVM2NVDLA/HPVM2NVDLAPass.cpp @@ -1,3 +1,4 @@ + #define ENABLE_ASSERTS #define DEBUG_TYPE "DFG2NVDLA" @@ -329,6 +330,7 @@ Weights CGT_NVDLA::readTrainedWeights(User *TensorPtr, DEBUG(errs() << "Data file is not found. Aborting.\n"); abort(); } + fseek(file, file_header_size, SEEK_CUR); size_t bytes_read = fread(tensor_data, 1, size_in_bytes, file); DEBUG(errs() << "BYTES READ: " << bytes_read << "\n"); @@ -1380,27 +1382,31 @@ fail: } bool HPVM2NVDLA::runOnModule(Module &M) { + DEBUG(errs() << "**************HPVM2NVDLA PASS****************\n"); NvDlaError e = NvDlaError_TestApplicationFailed; TestAppArgs testAppArgs = defaultTestAppArgs; // Get the HPVM IR graph - BuildDFG &DFG = getAnalysis<BuildDFG>(); - std::vector<DFInternalNode *> Roots = DFG.getRoots(); + BuildDFG &DFG = getAnalysis<BuildDFG>(); + std::vector<DFInternalNode *> Roots = DFG.getRoots(); - // Visitor for Code Generation Graph Traversal - CGT_NVDLA *CGTVisitor = new CGT_NVDLA(M, DFG); + // Visitor for Code Generation Graph Traversal + CGT_NVDLA *CGTVisitor = new CGT_NVDLA(M, DFG); if(ComputePrecision == "INT8" || ComputePrecision == "int8") { testAppArgs.computePrecision = nvdla::DataType::INT8; testAppArgs.quantizationMode = nvdla::QuantizationMode::PER_KERNEL; testAppArgs.configtarget = std::string("nv_small"); + errs() << " INT8 mode \n"; } else { testAppArgs.computePrecision = nvdla::DataType::HALF; testAppArgs.quantizationMode = nvdla::QuantizationMode::NONE; testAppArgs.configtarget = std::string("nv_full"); + errs() << "FP16 mode \n"; } + testAppArgs.profileName = std::string("hpvm-mod"); testAppArgs.calibTable = CalibTablePath;//std::string("output_scales.txt"); testAppArgs.outputPath = std::string("."); @@ -1419,7 +1425,9 @@ bool HPVM2NVDLA::runOnModule(Module &M) { return false; } + NvDlaError CGT_NVDLA::compileProfile(const TestAppArgs* appArgs, TestInfo* i) { + NvDlaError e = NvDlaSuccess; std::string profileName = ""; std::string targetConfigName = ""; @@ -1438,26 +1446,38 @@ NvDlaError CGT_NVDLA::compileProfile(const TestAppArgs* appArgs, TestInfo* i) { targetConfigName = appArgs->configtarget; + errs()<<" Compiling NVDLA code *** \n"; + // Determine profile PROPAGATE_ERROR_FAIL(generateProfile(appArgs, &profileName, i)); + errs() << "compiling profile \"%s\"... config \"%s\"...\n" << profileName.c_str() << targetConfigName.c_str() <<"\n"; + // Compile DEBUG(NvDlaDebugPrintf("compiling profile \"%s\"... config \"%s\"...\n", profileName.c_str(), targetConfigName.c_str())); - PROPAGATE_ERROR_FAIL(compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable)); + //PROPAGATE_ERROR_FAIL(compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable)); + + compiler->compile(profileName.c_str(), targetConfigName.c_str(), &i->compiledLoadable); + // Get loadable buffer and dump it into a file PROPAGATE_ERROR_FAIL(compiler->getLoadableImageSize(profileName.c_str(), - &size)); + &size)); if (size == 0) { ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "Invalid size for a loadable"); + errs()<<" ERROR: Invalid size for loadable \n"; } + errs()<<" size of loadable module = " << size; + buffer = (NvU8 *) NvDlaAlloc(size); if (buffer == NULL) { ORIGINATE_ERROR_FAIL(NvDlaError_InsufficientMemory, "Failed to allocate buffer for loadable"); + errs() << "ERROR: Failed to allocate buffer or loadable \n"; } + PROPAGATE_ERROR_FAIL(compiler->getLoadableImage(profileName.c_str(), buffer)); fileName = profileName + ".nvdla"; @@ -1588,6 +1608,8 @@ fail: NvDlaError CGT_NVDLA::generateTensorScales(const TestAppArgs* appArgs, TestInfo* i, nvdla::INetwork* network) { NvDlaError e = NvDlaSuccess; + errs() << " generateTenorScales \n"; + std::vector<nvdla::ILayer*> networkLayers = network->getLayers(); std::vector<nvdla::ITensor*> networkInputs = network->getInputs(); @@ -1626,13 +1648,16 @@ fail: return e; } + + NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i, nvdla::INetwork* network) { - NvDlaError e = NvDlaSuccess; + + NvDlaError e = NvDlaSuccess; NvDlaStatType stat; - std::string calibTableFile = /*i->calibTablesPath + "/" + */appArgs->calibTable; + std::string calibTableFile = appArgs->calibTable; - //PROPAGATE_ERROR_FAIL(NvDlaStat(calibTableFile.c_str(), &stat)); - DEBUG(errs() << "***********READING TENSOR SCALESi*************\n"); + errs() << "***********READING TENSOR SCALESi*************\n"; + std::ifstream infile(calibTableFile.c_str()); std::string line; std::map<std::string, float> LayerNameToScaleMap; @@ -1643,13 +1668,16 @@ NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i, std::string delimiter = ":"; std::string layer_name = line.substr(0, line.find(delimiter)); std::string Scale = line.substr(line.find(delimiter) + 1); - DEBUG(errs() << "LAYER NAME: " << layer_name << "\n"); - DEBUG(errs() << "SCALE: " << Scale << "\n"); + + errs() << "LAYER NAME: " << layer_name << "\n"; + errs() << "SCALE: " << Scale << "\n"; + size_t size; LayerNameToScaleMap[layer_name] = std::stof(Scale, &size); } + infile.close(); - DEBUG(errs() << "GOT TENSOR SCALES FROM CALIB TABLE\n"); + errs() << "GOT TENSOR SCALES FROM CALIB TABLE \n"; std::vector<nvdla::ILayer*> networkLayers = network->getLayers(); std::vector<nvdla::ITensor*> networkInputs = network->getInputs(); @@ -1665,36 +1693,42 @@ NvDlaError CGT_NVDLA::readTensorScales(const TestAppArgs* appArgs, TestInfo *i, PROPAGATE_ERROR_FAIL(Input->setChannelDynamicRange(-1, min, max) ); const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>("data", scale)); } - DEBUG(errs() << "PER LAYER CALIB\n"); + + errs() << "PER LAYER CALIB \n"; + for (auto *Layer : networkLayers) { NvF32 scale = 0.0f; - NvF32 min = 0.0f; - NvF32 max = 0.0f; - std::string tName = Layer->getName(); - DEBUG(errs() << "SETTING SCALE FOR LAYER NAME: " << tName << "\n"); - nvdla::ITensor* outTensor = Layer->getOutput(0); - auto it = LayerNameToScaleMap.find(tName); - if (it != LayerNameToScaleMap.end()) { - DEBUG(errs() << "SET SCALE FOR NAME: " << tName << "\n"); - DEBUG(errs() << "SCALE: " << it->second << "\n"); - scale = it->second; - min = scale * -127.0f; - max = scale * 127.0f; - } else { - DEBUG(errs() << "SET DEFAULT SCALE FOR NAME: " << tName << "\n"); - DEBUG(errs() << "SCALE: 1\n"); - scale = 1; - min = scale * -127.0f; - max = scale * 127.0f; - } - //else { - // ORIGINATE_ERROR_FAIL(NvDlaError_BadParameter, "Atleast 1 of scale or min-max should be specified for %s\n", tName.c_str()); - //} + NvF32 min = 0.0f; + NvF32 max = 0.0f; + + std::string tName = Layer->getName(); + errs() << "SETTING SCALE FOR LAYER NAME: " << tName << "\n"; + + nvdla::ITensor* outTensor = Layer->getOutput(0); + auto it = LayerNameToScaleMap.find(tName); + + if (it != LayerNameToScaleMap.end()) { + errs() << "SET SCALE FOR NAME: " << tName << "\n"; + errs() << "SCALE: " << it->second << "\n"; + scale = it->second; + min = scale * -127.0f; + max = scale * 127.0f; + } else { + + DEBUG(errs() << "SET DEFAULT SCALE FOR NAME: " << tName << "\n"); + DEBUG(errs() << "SCALE: 1\n"); + + scale = 1; + min = scale * -127.0f; + max = scale * 127.0f; + } + PROPAGATE_ERROR_FAIL( outTensor->setChannelDynamicRange(-1, min, max) ); const_cast<TestAppArgs*>(appArgs)->tensorScales.insert(std::pair<std::string, NvF32>(tName, scale)); } - DEBUG(errs() << "DONE PARSING CALIBRATION TABLE\n"); + errs() << "DONE PARSING CALIBRATION TABLE ----- \n"; + fail: return e; } diff --git a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp index 3ab28ea74edd80e9850b3ee95370b7b99d5f30c1..9c9099cda1afaa4b4ba8812ab5ef84729b12c667 100644 --- a/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp +++ b/hpvm/test/dnn_benchmarks/hpvm-c/benchmarks/miniera-hpvm/src/miniera-hpvm.cpp @@ -369,29 +369,29 @@ int main(){ //std::string input_path = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/input_fp16.bin"; std::string labels_path = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/labels_fp16.bin"; //char conv2d_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w_fp16.bin"; - void* conv2d_1_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w.bin", 0,32,3,3,3); - //char conv2d_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16.bin"; - void* conv2d_1_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b.bin", 0,1,32, 1, 1);//30,30); - //char conv2d_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16.bin"; - void* conv2d_2_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w.bin", 0,32,32,3,3); - //char conv2d_2_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16.bin"; - void* conv2d_2_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b.bin", 0,1,32, 1, 1);//28,28); - //char conv2d_3_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16.bin"; - void* conv2d_3_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w.bin", 0,64,32,3,3); - //char conv2d_3_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16.bin"; - void* conv2d_3_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b.bin", 0,1,64, 1, 1);//12,12); - //char conv2d_4_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16.bin"; - void* conv2d_4_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w.bin", 0,64,64,3,3); - //char conv2d_4_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16.bin"; - void* conv2d_4_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b.bin", 0,1,64, 1, 1);//10,10); - //char dense_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16.bin"; - void* dense_1_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w.bin", 0,1,1,1600,256); - //char dense_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16.bin"; - void* dense_1_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b.bin", 0,1,256,1,1); - //char dense_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16.bin"; - void* dense_2_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w.bin", 0,1,1,256,5); - //char dense_2_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16.bin"; - void* dense_2_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b.bin", 0,1,5,1,1); + void* conv2d_1_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_w_fp16.bin", 0,32,3,3,3); + //char conv2d_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16_fp16.bin"; + void* conv2d_1_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_1_b_fp16.bin", 0,1,32, 1, 1);//30,30); + //char conv2d_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16_fp16.bin"; + void* conv2d_2_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_w_fp16.bin", 0,32,32,3,3); + //char conv2d_2_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16_fp16.bin"; + void* conv2d_2_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_2_b_fp16.bin", 0,1,32, 1, 1);//28,28); + //char conv2d_3_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16_fp16.bin"; + void* conv2d_3_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_w_fp16.bin", 0,64,32,3,3); + //char conv2d_3_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16_fp16.bin"; + void* conv2d_3_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_3_b_fp16.bin", 0,1,64, 1, 1);//12,12); + //char conv2d_4_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16_fp16.bin"; + void* conv2d_4_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_w_fp16.bin", 0,64,64,3,3); + //char conv2d_4_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16_fp16.bin"; + void* conv2d_4_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/conv2d_4_b_fp16.bin", 0,1,64, 1, 1);//10,10); + //char dense_1_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16_fp16.bin"; + void* dense_1_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_w_fp16.bin", 0,1,1,1600,256); + //char dense_1_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16_fp16.bin"; + void* dense_1_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_1_b_fp16.bin", 0,1,256,1,1); + //char dense_2_w_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16_fp16.bin"; + void* dense_2_w = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_w_fp16.bin", 0,1,1,256,5); + //char dense_2_b_path[] = "../../../../../projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16_fp16.bin"; + void* dense_2_b = readTrainedWeights("/home/hsharif3/Gitlab/old_hpvm_nvdla/hpvm/llvm/projects/hpvm-tensor-rt/model_params/legacy/hpvm_mio/dense_2_b_fp16.bin", 0,1,5,1,1); //void* input = readTrainedWeights(input_path, 0,1,3,32,32); //uint32_t* labels = readLabels3(labels_path, 500); diff --git a/hpvm/tools/hpvm-clang/main.py.in b/hpvm/tools/hpvm-clang/main.py.in index 0c047de0437285b84227a0f4ef7614761a2daed2..7d4d8644e687cf4a810e49a1b7bb0b4b294b7768 100644 --- a/hpvm/tools/hpvm-clang/main.py.in +++ b/hpvm/tools/hpvm-clang/main.py.in @@ -60,7 +60,8 @@ def compile_hpvm_c( passes += ["LLVMHPVM2NVDLAPass"] pass_flags += [ "hpvm-nvdla", - "cprecision=fp16", + #"cprecision=fp16", + "cprecision=int8", "calib-table=calib.txt" ]